/* LemonTree 
 * 
 * Copyright (c) 2012 Tom Michoel, Eric Bonnet 
 * 
 * LemonTree is free software, released under the terms of the GNU general
 * Public License (GPL) v2. See LICENSE file for details.  
 *
*/

package lemontree.modulenetwork;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.InputMismatchException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import lemontree.utils.SetOperation;
import lemontree.utils.XmlXomReader;

import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Elements;
import nu.xom.ParsingException;
import nu.xom.ValidityException;

import org.xml.sax.SAXParseException;

import lemontree.ganesh.GibbsSampler;
import lemontree.modulenetwork.Regulator;
import cern.colt.list.DoubleArrayList;
import cern.jet.stat.Descriptive;

/**
 * Main class holding all module network components and network level algorithms.
 *  
 * @author tomic
 *
 */

public class ModuleNetwork {

	public static enum dataType {
		RATIO, AFFY
	};

	/**
	 * Parameter settings
	 */
	HashMap<String, String> parameters;

	/**
	 * Genes in the data set. Array index corresponds to gene number. 
	 * See also {@link Gene}.
	 */
	public ArrayList<Gene> geneSet;

	/**
	 * Description of experimental conditions in the data set. Array index corresponds
	 * to column index in the data matrix.
	 */
	public ArrayList<Experiment> conditionSet;

	/**
	 * HashMap object of experiments identified by their name
	 */
	public HashMap<String, Experiment> conditionMap;

	/**
	 * List of modules.
	 */
	public ArrayList<Module> moduleSet;

	/**
	 * Different equally likely clusterings of genes into modules. 
	 */
	public ArrayList<ArrayList<Module>> moduleSets;

	/** 
	 * List of candidate regulators.
	 */
	public ArrayList<Gene> regulatorSet;

	/** 
	 * Module graph.
	 */
	public ModuleGraph moduleGraph;

	/**
	 * Number of genes in the data set.
	 */
	public int numGenes;

	/**
	 * Number of experimental conditions in the data set.
	 */
	public int numCond;

	/**
	 * Matrix of expression values. Rows are genes, columns are experiments
	 */
	public double[][] data;

	/**
	 * Map of genes to the module (identified by index) they belong to. 
	 */
	public HashMap<String, Integer> inModule;

	/**
	 * Map of genes to the modules they belong to, index mirrors moduleSets.
	 */
	public HashMap<String, ArrayList<Integer>> inModules;

	/**
	 * Map to find Gene objects by their name.
	 */
	public Map<String, Gene> geneMap;
	
	private HashMap<Integer, Module> moduleMap;

	/**
	 * Total Bayesian score of the module network.
	 */
	public double networkScore;

	/**
	 * Normal-gamma prior parameters to compute Bayesian score.
	 * Ordered in array as {&lambda;<sub>0</sub>, &mu;<sub>0</sub>, &alpha;<sub>0</sub>,
	 * &beta;<sub>0</sub>}.
	 */
	public double[] normalGammaPrior = new double[4];

	public dataType dataType;

	/**
	 * Average expression value for data matrix
	 */
	public double dataMean;

	/**
	 * Standard deviation for data matrix
	 */
	public double dataSigma;

	/**
	 * Minimum expression value for data matrix
	 */
	public double dataMin;

	/**
	 * Maximum expression value for data matrix
	 */
	public double dataMax;

	/**
	 * Average expression value for microRNA set
	 */
	public double mirMean;

	/**
	 * Average expression value for microRNA set
	 */
	public double mirSigma;

	/**
	 * List of microRNA genes
	 * 
	 */
	public ArrayList<Gene> mirSet;
	
	/**
	 * Use global mean or module mean for figures
	 */
	private boolean useGlobalMeanForFigures;
	
	private boolean useRegulatorMeanForFigures;

	/**
	 * Empty constructor
	 */
	public ModuleNetwork() {
	}

	/**
	 * Constructs a module network from an XML file.
	 * @see ModuleNetwork#fromXML
	 * @deprecated
	 * @param xmlFile name of input XML file, generated by (or conforming to) 
	 * {@link ModuleNetwork#toXML}.
	 */
	public ModuleNetwork(String xmlFile) {
		try {
			this.fromXML(xmlFile);
			this.setTestSplits();
		} catch (SAXParseException ex1) {
			System.out.println("SAXParseException");
		} catch (IOException ex2) {
			System.out.println("IOException");
		}

	}

	/**
	 * Constructs a module network from all XML files in a directory.
	 * This is used to compile the results of different independent Gibbs sampler 
	 * runs into one network. Make sure that xmlDir contains only XML files
	 * and that all of them are coming from the same data set.
	 * @deprecated
	 * @param xmlDir directory containing XML files
	 * @param xmlFileOut file to store the new module network
	 * 
	 * @author tomic
	 */
	public ModuleNetwork(String xmlDir, String xmlFileOut, boolean write) {
		// get all XML files in xmlDir
		File dir;
		
		if (xmlDir.endsWith("/"))
			dir = new File(xmlDir);
		else {
			xmlDir = xmlDir + "/";
			dir = new File(xmlDir);
		}

		File[] files = dir.listFiles();
		try {
			// initialize with 1st file
			this.fromXML(xmlDir + files[0].getName());

			// add other files
			for (int k = 1; k < files.length; k++) {
				this.fromXMLModSets(xmlDir + files[k].getName());
			}
			this.setTestSplits();
			// write output
			if (write)
				this.writeXML(xmlFileOut);
		} catch (SAXParseException ex1) {
			System.out.println("SAXParseException");
		} catch (IOException ex2) {
			System.out.println("IOException");
		}
	}

	
	/**
	 * Read module data only, from xml files, used when learning regulators has been split in different files.
	 * 
	 * @deprecated
	 * @param xmlDir directory where xml files are stored
	 * @param nonsense fake parameter
	 * @param notused fake parameter
	 * 
	 * @author erbon
	 */
	public ModuleNetwork(String xmlDir, String nonsense, String notused) {
		File dir;
		if (xmlDir.endsWith("/"))
			dir = new File(xmlDir);
		else {
			xmlDir = xmlDir + "/";
			dir = new File(xmlDir);
		}
		File[] files = dir.listFiles();
		
		// Read data and modules from the first xml file
		this.fromXMLDataMod(xmlDir + files[0].getName());
		
		// Create a map module number to module object
		HashMap<Integer,Module> id2mod = new HashMap<Integer,Module>();
		for (Module mod : this.moduleSet)
			id2mod.put(mod.number, mod);
		
		// Loop over all xml files to read hierarchical trees
		for (int i=0;i<files.length;i++){
			this.fromXMLTrees(xmlDir + files[i].getName(), id2mod);
		}
		this.setTestSplits();
	}

	/**
	 * Constructs an initial module network for a given data set and initial clustering 
	 * without any links between modules.
	 * 
	 * @deprecated
	 * @param dataDir directory where the data set and initial clustering are located.
	 * @param datasetFile data set file conforming to the format explained in 
	 * <code>LeMoNe.properties</code>.
	 * @param numCluster number of initial clusters.
	 * @param clusterFile file with cluster memberships for each gene conforming to the 
	 * format explained in <code>LeMoNe.properties</code>.
	 * @param regulatorsFile file with candidate regulators.
	 * @param lambda0 normal-gamma prior parameter
	 * @param mu0 normal-gamma prior parameter
	 * @param alpha0 normal-gamma prior parameter
	 * @param beta0 normal-gamma prior parameter
	 * 
	 * @author tomic, erbon, stmae
	 */
//	public ModuleNetwork(String dataDir, String datasetFile, int numCluster, String clusterFile, String regulatorsFile, double lambda0, double mu0, double alpha0, double beta0, HashMap<String, String> param) {
//
//		// normal Gamma priors
//		normalGammaPrior[0] = lambda0;
//		normalGammaPrior[1] = mu0;
//		normalGammaPrior[2] = alpha0;
//		normalGammaPrior[3] = beta0;
//
//		this.inModule = new HashMap<String, Integer>();
//		this.readExpressionMatrix(datasetFile);
//		this.readClusters(dataDir, numCluster, clusterFile);
//		this.readRegulators(dataDir, regulatorsFile);
//
//		// compute initial statistics and score
//		ArrayList<Integer> allConds = new ArrayList<Integer>();
//		for (int i = 0; i < this.numCond; i++)
//			allConds.add(i);
//		for (Module module : this.moduleSet) {
//			module.hierarchicalTree = new TreeNode(module, this.normalGammaPrior);
//			module.hierarchicalTree.leafDistribution.condSet = allConds;
//			module.hierarchicalTree.statistics();
//			module.moduleScore = module.hierarchicalTree.bayesianScore();
//			module.moduleNetwork = this;
//		}
//		// calculate the score of the initial network
//		this.bayesianScore();
//
//		//this.moduleGraph = new ModuleGraph(this);
//
//		double nrDataPoints = 0;
//		double sumSquare = 0.0;
//		this.dataMean = 0.0;
//		this.dataSigma = 0.0;
//		this.dataMin = Double.POSITIVE_INFINITY;
//		this.dataMax = Double.NEGATIVE_INFINITY;
//		for (int i = 0; i < this.data.length; i++) {
//			for (int j = 0; j < this.data[i].length; j++) {
//				if (!Double.isNaN(data[i][j])) {
//					this.dataMean += data[i][j];
//					sumSquare += Math.pow(data[i][j], 2);
//					nrDataPoints += 1;
//					if (data[i][j] < this.dataMin)
//						this.dataMin = data[i][j];
//					else if (data[i][j] > this.dataMax)
//						this.dataMax = data[i][j];
//				}
//			}
//		}
//
//		this.dataMean /= (double) nrDataPoints;
//		this.dataSigma = Math.sqrt(sumSquare - nrDataPoints * Math.pow(this.dataMean, 2)) / Math.sqrt((double) nrDataPoints);
//		System.out.println("dataMean " + dataMean);
//		System.out.println("dataSigma " + dataSigma);
//
//		// default, but should be handled more cleanly
//		this.dataType = dataType.RATIO;
//
//		// parameter settings
//		this.parameters = param;
//	}
	
	/**
	 * Constructs an initial empty module network for a given data set. To be used in 
	 * conjunction with {@link ModuleNetwork#gibbsSampleGenes}.
	 * 
	 * @deprecated
	 * @param dataDir directory where the data set and initial clustering are located.
	 * @param datasetFile data set file conforming to the format explained in 
	 * <code>LeMoNe.properties</code>.
	 * @param regulatorsFile file with candidate regulators.
	 * @param lambda0 normal-gamma prior parameter
	 * @param mu0 normal-gamma prior parameter
	 * @param alpha0 normal-gamma prior parameter
	 * @param beta0 normal-gamma prior parameter
	 * 
	 * @author tomic
	 */
//	public ModuleNetwork(String dataDir, String datasetFile, String regulatorsFile, double lambda0, double mu0, double alpha0, double beta0, HashMap<String, String> param) {
//		// normal Gamma priors
//		normalGammaPrior[0] = lambda0;
//		normalGammaPrior[1] = mu0;
//		normalGammaPrior[2] = alpha0;
//		normalGammaPrior[3] = beta0;
//		// read data and candidate regulators
//		this.readExpressionMatrix(dataDir, datasetFile);
//		this.readRegulators(dataDir, regulatorsFile);
//		this.moduleSet = new ArrayList<Module>();
//		this.moduleSets = new ArrayList<ArrayList<Module>>();
//		// parameter settings
//		this.parameters = param;
//		// default, but should be handled more cleanly
//		this.dataType = dataType.RATIO;
//
//	}


	/**
	 * Reads the expression data and conditions from a tab-delimited text file.
	 * 
	 * @param dataFile expression data file name, with header. 
	 * 
	 * @author tomic, erbon
	 */
	public void readExpressionMatrix(String dataFile, String geneFile) {
		
		System.out.print("Reading expression data...");
		HashSet<String> geneList = new HashSet<String>();
		
		// if a gene file is given, load the genes in a hashset		
		try {

			if (geneFile !=null) {
				BufferedReader buff = new BufferedReader(new FileReader(new File(geneFile)));
				String line;
				while ((line = buff.readLine()) != null) {
					line.trim();
					if (line.length()>0 && !line.startsWith("#"))
						geneList.add(line);
				}
			}
		} 
		catch (FileNotFoundException e) {
			System.out.println();
			System.out.println("Error: file "+geneFile+" not found.");
			System.exit(1);
		}
		catch (IOException e) {
			System.out.println();
			System.out.println("Error: can't read "+geneFile+" file.");
			System.exit(1);
		}
		
		
		try {
			// read data file
			BufferedReader buff = new BufferedReader(new FileReader(new File(dataFile)));
			// read first line and set the number of conditions and experiments
			String line = buff.readLine();
			line.trim();
			String[] tokens = line.split("\\t|\\s+");
			this.numCond = tokens.length - 1;
			this.conditionSet = new ArrayList<Experiment>(numCond);
			for (int i = 1; i < tokens.length; i++) {
				this.conditionSet.add(new Experiment(tokens[i], i - 1));
			}
			// read all remaining lines in a list
			ArrayList<String> geneLines = new ArrayList<String>();
			int numberOfGenes = 0;
			while ((line = buff.readLine()) != null) {
				if (line.length()>0) {
					geneLines.add(line.trim());
					numberOfGenes++;
				}
			}
			// set the number of genes, data matrix size and gene set
			if (geneList.size()>0)
				this.numGenes=geneList.size();
			else
				this.numGenes = numberOfGenes;
			
			this.data = new double[numGenes][numCond];
			this.geneSet = new ArrayList<Gene>(numGenes);
	
			/*
			 * process all lines and set genes and expression values.
			 * if a gene list was given, add only the genes that are in the list.
			 */
			int numMissing = 0;
			for (int i = 0; i < geneLines.size(); i++) {
				
				String[] val = geneLines.get(i).split("\\t|\\s+");
				
				if (geneList.size()>0) {
					if (geneList.contains(val[0])) {
						this.geneSet.add(new Gene(val[0].trim(), "", i));
						for (int j = 1; j < val.length; j++) {
							try {
								this.data[i][j - 1] = new Double(val[j].trim()).doubleValue();
								if (Double.isNaN(data[i][j-1]))
									numMissing++;
							} catch (NumberFormatException e) {
								data[i][j - 1] = Double.NaN;
								numMissing++;
							}
						}
					}
				}
				else {
					this.geneSet.add(new Gene(val[0].trim(), "", i));
					for (int j = 1; j < val.length; j++) {
						try {
							this.data[i][j - 1] = new Double(val[j].trim()).doubleValue();
							if (Double.isNaN(data[i][j-1]))
								numMissing++;
						} catch (NumberFormatException e) {
							data[i][j - 1] = Double.NaN;
							numMissing++;
						}
					}
				}
			}
			
			System.out.println(" [ok]");
			if (geneList.size()>0)
				System.out.println("Number of genes in the gene list: "+geneList.size());
			System.out.println("Number of genes in the data matrix: "+numGenes);
			System.out.println("Number of conditions in the data matrix: "+numCond);
			System.out.println("Number of missing values in the data matrix: "+numMissing);
			System.out.println("Total number of expression values: "+(numGenes * numCond));
			
			//initialize module set objects
			this.moduleSet = new ArrayList<Module>();
			this.moduleSets = new ArrayList<ArrayList<Module>>();

		} 
		catch (FileNotFoundException e) {
			System.out.println();
			System.out.println("Error: file "+dataFile+" not found.");
			System.exit(1);
		}
		catch (IOException e) {
			System.out.println();
			System.out.println("Error: can't read "+dataFile+" file.");
			System.exit(1);
		}

	}

	/**
	 * Reads an initial clustering of the genes.
	 * 
	 * @param dataDir directory where the data set and initial clustering are located.
	 * @param numCluster number of initial clusters.
	 * @param clusterFile file with cluster memberships for each gene 
	 * 
	 * @author tomic, erbon, stmae
	 */
	public void readClusters(String clusterFile) {
		
		System.out.println("Reading cluster file "+clusterFile);
		// read all lines in a list
		ArrayList<String> allLines = new ArrayList<String>();
		try {
			BufferedReader input = new BufferedReader(new FileReader(new File(clusterFile)));
			String line;
			while ((line = input.readLine()) != null) {
				if (!line.startsWith("#"))
					allLines.add(line.trim());
			}
		}
		catch (FileNotFoundException e) {
			System.out.println("Error: file "+clusterFile+" not found.");
			System.exit(1);
		}
		catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
		// determine the number of clusters from the IDs
		int ctr, numCluster=0;
		for (String l : allLines) {
			// split on either tab or spaces
			String[] tk = l.split("\\s+|\\t");
			ctr = Integer.parseInt(tk[1])+1;
			if (ctr>numCluster)
				numCluster = ctr;
		}

		// create moduleSet objects and assign genes to modules
		int assigned=0;
		int notassigned=0;
		this.moduleSet = new ArrayList<Module>();
		this.moduleMap = new HashMap<Integer, Module>();
		for (int i = 0; i < numCluster; i++) {
			Module mo = new Module(this, i, this.normalGammaPrior);
			this.moduleSet.add(mo);
			this.moduleMap.put(i, mo);
		}
		moduleSets.add(moduleSet);
		
		// create hashmap to find genes in geneSet by their name
		geneMap = new HashMap<String, Gene>();
		for (Gene gene : this.geneSet)
			geneMap.put(gene.name, gene);
		this.inModule = new HashMap<String, Integer>();
		int first = 1;
		for (String l : allLines) {
			String[] tk = l.split("\\s+|\t");
			String gene_name = tk[0];
			int cluster_number = Integer.parseInt(tk[1]);
			if (cluster_number == 0) {
				first = 0;
			}
			// set module of gene
			if (geneMap.containsKey(gene_name)) {
				assigned++;
				inModule.put(gene_name, cluster_number - first);
				moduleSet.get(cluster_number - first).genes.add(geneMap.get(gene_name));
			} else {
				notassigned++;
			}
		}
		System.out.println(" [ok]");
		System.out.println("Assigned genes: "+assigned);
		System.out.println("Not assigned genes: "+notassigned);
		System.out.println("Number of clusters: "+numCluster);
	}

	/**
	 * Read multiple cluster files in a moduleSets object.
	 * 
	 * @param fileName text file containing a list of cluster files.
	 */
	public void readMultipleClusters(String fileName) {
		System.out.println("Reading multiple cluster files...");
		
		// read file names from a list
		ArrayList<String> clusterFiles = new ArrayList<String>(); 
		try {
			BufferedReader buf = new BufferedReader(new FileReader(fileName));
			String line;
			while((line = buf.readLine()) != null) {
				line.trim();
				clusterFiles.add(line);
			}
		}
		catch (FileNotFoundException e){
			System.out.println("Error: file "+fileName+" not found.");
			System.exit(1);
		}
		catch (IOException e ) {
			System.out.println("Error: IOException.");
			e.printStackTrace();
			System.exit(1);
		}
		
		/*
		 * Read multiple cluster files using the readClusters function.
		 * Cluster files will be added as moduleSet objects in the moduleSets field.
		 * (!!!) The objects moduleSet, moduleMap, geneMap and inModule will point only to the last moduleSet.
		 * 
		 */
		for (String fn : clusterFiles) {
			readClusters(fn);
		}
		
		System.out.println("[ok]");
		
	}

	/**
	 * Write clusters pointed by moduleSet to a text file.
	 * 
	 * @param fileName output file name
	 * @author eric
	 */
	public void writeClusters(String fileName) {
		System.out.print("Writing clusters to text file "+fileName);
		try {
			PrintWriter out = new PrintWriter(new FileWriter(fileName));
			for (Module mod : moduleSet) {
				int modNumber = mod.number;
				for (Gene g : mod.genes) {
					out.println(g.name+"\t"+modNumber);
				}
			}
			out.close();
			System.out.println(" [ok]");
		}
		catch (IOException e) {
			System.out.println("Error: IO Exception.");
			e.printStackTrace();
			System.exit(1);
		}
	}
	
	
	/**
	 * Reads the set of candidate regulators.
	 * 
	 * @param dataDir directory where the data set and initial clustering are located.
	 * @param regulatorsFile file with candidate regulators.
	 * 
	 * @author tomic, erbon, stmae
	 */
	public void readRegulators(String regulatorsFile) {
		System.out.print("Reading candidate regulators...");
		int assigned = 0;
		int assignedDiscrete = 0;
		int notassigned = 0;
		regulatorSet = new ArrayList<Gene>();
		// create hashmap to find genes in geneSet by their name
		Map<String, Gene> geneMap = new HashMap<String, Gene>();
		for (Gene gene : this.geneSet)
			geneMap.put(gene.name, gene);
		
		if (regulatorsFile != null) {
			try {
				// open file
				BufferedReader input = new BufferedReader(new FileReader(new File(regulatorsFile)));
				String line;
				while ((line = input.readLine()) != null) {
					line.trim();
					// skip comment lines
					if (!line.startsWith("#")) {

						// split on tabs
						String tk[] = line.split("\\t");

						// 2 columns: 1st = gene name 2nd = type (continuous or discrete)
						if (tk.length > 1) {

							String gene_name = tk[0];
							String type = tk[1];

							// test if gene belongs to the gene set
							if (geneMap.containsKey(gene_name)) {

								// add gene to the regulatorset
								regulatorSet.add(geneMap.get(gene_name));

								// set split values
								geneMap.get(gene_name).possibleSplitValues = new ArrayList<Double>();
								for (int i = 0; i < data[geneMap.get(gene_name).number].length; i++) {
									geneMap.get(gene_name).possibleSplitValues.add(new Double(data[geneMap.get(gene_name).number][i]));
								}

								// test if the regulator is continuous or discrete
								if (type.equalsIgnoreCase("d") == true) {
									geneMap.get(gene_name).setDiscrete(true);
									assignedDiscrete++;
								}

								assigned++;
							}
							else 
								notassigned++;
						}
						else {
							// one column only, assumes that all of them are of continuous type (default type) 
							String gene_name = tk[0];
							if (geneMap.containsKey(gene_name)) {
								regulatorSet.add(geneMap.get(gene_name));
								geneMap.get(gene_name).possibleSplitValues = new ArrayList<Double>();
								for (int i = 0; i < data[geneMap.get(gene_name).number].length; i++) {
									geneMap.get(gene_name).possibleSplitValues.add(new Double(data[geneMap.get(gene_name).number][i]));
								}

								assigned++;
							}
							else 
								notassigned++;
						}
					}
				}
				System.out.println(" [ok]");
				System.out.println("Found " + assigned + " regulators belonging to the geneset.");
				System.out.println(assignedDiscrete + " of them are discrete");
				System.out.println(notassigned + " regulators do not belong to the gene set");
			}
			catch (FileNotFoundException e) {
				System.out.println("Error: File " + regulatorsFile + " not found.");
				System.exit(1);
			}
			catch (IOException e) {
				System.out.println("Error while reading file " + regulatorsFile);
				e.printStackTrace();
				System.exit(1);
			}
		}
	}

	/**
	 * Reads a list of genes of interest.
	 * 
	 * @param dataDir directory 
	 * @param geneFile file with gene names.
	 * 
	 * @author tomic
	 */
	public ArrayList<Gene> readGenes(String dataDir, String geneFile) {
		System.out.println("Reading genes ...");
		int assigned = 0;
		int notassigned = 0;
		ArrayList<Gene> geneList = new ArrayList<Gene>();
		// create hashmap to find genes in geneSet by their name
		Map<String, Gene> geneMap = new HashMap<String, Gene>();
		for (Gene gene : this.geneSet)
			geneMap.put(gene.name, gene);
		try {
			Scanner geneScanner = new Scanner(new File(dataDir, geneFile)).useDelimiter("\\n");
			// walk through file, we only collect the first column (assuming
			// DESCRIPTION is already set in the datafile)
			while (geneScanner.hasNext()) {
				Scanner line = new Scanner(geneScanner.next().toString()).useDelimiter("\\s");
				// read gene name
				String name = line.next();
				if (!name.startsWith("#")) { // skip comment lines
					// add gene to regulator set
					if (geneMap.containsKey(name)) {
						geneList.add(geneMap.get(name));
						assigned++;
					} else
						notassigned++;
				}
			}
			System.out.println("... found " + assigned + " genes belonging to the gene set, and " + notassigned + " not belonging to the gene set.");
			System.out.println();
		} catch (FileNotFoundException e) {
			System.out.println("Gene file not found.");
		}
		return geneList;
	}

	/**
	 * Does not perform real score computation, but only sums the different module scores.
	 * 
	 * @author tomic
	 */
	public void bayesianScore() {
		double score = 0.0;
		for (Module module : moduleSet)
			score = score + module.moduleScore;
		networkScore = score;
	}

	/**
	 * Converts a list of modules to a partition of the gene indices.
	 * @param modSet
	 * @return
	 * 
	 * @author tomic
	 */
	public ArrayList<ArrayList<Integer>> moduleSet2Partition(ArrayList<Module> modSet) {
		ArrayList<ArrayList<Integer>> partition = new ArrayList<ArrayList<Integer>>();
		for (Module mod : modSet) {
			ArrayList<Integer> list = new ArrayList<Integer>();
			for (Gene gene : mod.genes)
				list.add(gene.number);
			partition.add(list);
		}
		return partition;
	}

	/**
	 * Make sets of genes that are clustered together in all module sets in 
	 * {@link ModuleNetwork#moduleSets}
	 * @param minSize
	 * @return
	 * 
	 * @author tomic
	 */
	public ArrayList<ArrayList<Integer>> makeConsensusGeneSets(int minSize) {
		if (this.moduleSets.size() > 1) {
			// initialize
			ArrayList<ArrayList<Integer>> consensus = this.moduleSet2Partition(this.moduleSets.get(0));
			System.out.println(consensus.size());
			// take intersection with remaining module sets
			for (int k = 1; k < this.moduleSets.size(); k++) {
				consensus = SetOperation.prodPart(consensus, this.moduleSet2Partition(this.moduleSets.get(k)));
				System.out.println(consensus.size());
			}
			// remove sets that are too small
			ArrayList<ArrayList<Integer>> remove = new ArrayList<ArrayList<Integer>>();
			for (ArrayList<Integer> set : consensus)
				if (set.size() < minSize)
					remove.add(set);
			for (ArrayList<Integer> set : remove)
				consensus.remove(set);
			System.out.println(consensus.size());
			return consensus;
		} else {
			return this.moduleSet2Partition(this.moduleSet);
		}
	}

	/**
	 * Sets the map of genes to modules
	 * 
	 * @author tomic
	 */
	public void setInModule() {
		this.inModule = new HashMap<String, Integer>();
		for (Module mod : this.moduleSet) {
			for (Gene gene : mod.genes) {
				this.inModule.put(gene.name, mod.number);
			}
		}
	}

	/**
	 * Sets the map of genes to modules
	 * 
	 * @author tomic
	 *
	 */
	public void setInModules() {
		// initialize
		this.inModules = new HashMap<String, ArrayList<Integer>>();
		for (Gene gene : this.geneSet)
			this.inModules.put(gene.name, new ArrayList<Integer>(this.moduleSets.size()));
		// loop over module sets
		for (int k = 0; k < this.moduleSets.size(); k++) {
			//System.out.println(k);
			for (Module mod : this.moduleSets.get(k)) {
				for (Gene gene : mod.genes) {
					this.inModules.get(gene.name).add(mod.number);
				}
			}
		}
	}

	/**
	 * 
	 * @param outFile
	 * @author tomic
	 */
	public void printInModules(String outFile) {
		this.setInModules();
		try {
			File f = new File(outFile);
			PrintWriter pw = new PrintWriter(f);
			for (Gene gene : this.geneSet) {
				String s = String.format("%s\t%d", gene.name, gene.number); //String.format("%.5s", gene.name) + "  \t" + gene.number;
				for (int m : this.inModules.get(gene.name))
					s += "\t" + m;
				pw.println(s);
			}
			pw.close();
		} catch (IOException e) {
			System.out.println(e);
		}
	}

	/**
	 * Reassigns genes to other modules based on the current partitioning of the 
	 * experiments in each module. All single gene moves which improve the Bayesian
	 * score are made.
	 * 
	 * @param cutoff only consider moves with score improvement higher than cutoff.
	 * @return number of moves made.
	 * @author erbon, tomic
	 */
	public int moduleReassign(double cutoff) {
		this.setInModule();
		// count number of moves
		int numMoves = 0;
		for (Gene gene : geneSet) {
			// loop over all modules
			for (Module module : moduleSet) {
				// current module for gene
				Module current_module = moduleSet.get(inModule.get(gene.name));
				// check if one of the modules was changed
				if (module.changed || current_module.changed) {
					// reaffect gene to new module
					if (!module.equals(current_module)) {
						// current score (relevant part)
						double current_score = current_module.moduleScore + module.moduleScore;
						// for each leaf of the trees of current_module and
						// module: make a copy of statistics
						current_module.hierarchicalTree.copyStatisticsTmp();
						module.hierarchicalTree.copyStatisticsTmp();
						// compute statistics and score for hypothetical move,
						current_module.hierarchicalTree.statisticsRemoveGeneTmp(gene);
						module.hierarchicalTree.statisticsAddGeneTmp(gene);
						double new_score_current = current_module.hierarchicalTree.bayesianScoreTmp();
						double new_score_m = module.hierarchicalTree.bayesianScoreTmp();
						// make move only if the score is improved
						if ((new_score_current + new_score_m - current_score) / numGenes > cutoff) {
							System.out.println(gene.name + "\t" + current_module.number + "\t" + module.number);
							numMoves++;
							inModule.put(gene.name, module.number);
							// gene.module = module;
							module.genes.add(gene);
							current_module.genes.remove(gene);
							// copy temporary statistics
							current_module.hierarchicalTree.copyStatistics();
							module.hierarchicalTree.copyStatistics();
							// update network score (before updating module scores)
							networkScore = networkScore - current_module.moduleScore - module.moduleScore + new_score_current + new_score_m;
							// set module scores
							current_module.moduleScore = new_score_current;
							module.moduleScore = new_score_m;
						}
					}
				}
			}
		}
		return numMoves;
	}

	/**
	 * Makes a single consensus tree out of different experiment partitions
	 * 
	 * @param cutLevel where to cut trees
	 * @param minSizeExpt minimal number of experiments in a leaf
	 * @param multiplication factor in split significance test
	 * 
	 * @author tomic
	 */
	public void makeConsensusTrees(int cutLevel, int minSizeExpt, double signiFact) {
		System.out.println("\nMaking consensus trees ...");
		// check for multiple module sets
		if (this.moduleSets.isEmpty())
			this.moduleSets.add(this.moduleSet);
		for (int i = 0; i < this.moduleSets.size(); i++) {
			System.out.println("... module set " + i);
			for (Module mod : this.moduleSets.get(i)) {
				for (TreeNode root : mod.hierarchicalTrees)
					root.testLevel(cutLevel);
				mod.makeConsensusTree(minSizeExpt);
				mod.hierarchicalTree = mod.findSignificantSplits(mod.hierarchicalTree.getLeafNodes(), signiFact);
				mod.hierarchicalTrees = new ArrayList<TreeNode>();
			}
		}
	}

	/**
	 * Assigns regulators to nodes in the hierarchical trees with acyclicity. 
	 * This method considers one set of modules and one hierarchical tree per module, 
	 * and assigns regulators first to the roots of the trees, then to the children
	 * of the roots, etc. It always assigns the regulator with lowest entropy that
	 * does not violate acyclicity of the current network. 
	 * 
	 * @see TreeNode#findRegulator(HashSet).
	 * 
	 * @author tomic
	 */
	public void assignRegulators() {
		System.out.println("\nAssigning regulators ...");
		// list of treenodes and the modules they belong to
		HashMap<TreeNode, Module> trees = new HashMap<TreeNode, Module>();
		// for each module: list of regulators that we can add to it
		HashMap<Module, ArrayList<Gene>> modRegs = new HashMap<Module, ArrayList<Gene>>();
		// maximal tree depth
		int maxdepth = 0;
		// reset module graph
		this.moduleGraph = new ModuleGraph(this);

		for (Module mod : moduleSet)
			if (mod.hierarchicalTree.treeDepth() > maxdepth)
				maxdepth = mod.hierarchicalTree.treeDepth();
		System.out.println("... max depth = " + maxdepth);

		// for each module, initialize regulation tree, add root to trees, and
		// find best candidate regulator
		System.out.println("... finding candidate regulators for roots");
		for (Module mod : moduleSet) {
			mod.parents = new ArrayList<Gene>();
			if (mod.hierarchicalTree.nodeStatus.equals("internal")) {
				trees.put(mod.hierarchicalTree, mod);
				ArrayList<Gene> regSet = new ArrayList<Gene>();
				for (Gene reg : this.regulatorSet)
					if (!moduleGraph.testCycle(reg, mod))
						regSet.add(reg);
				modRegs.put(mod, regSet);
				mod.hierarchicalTree.findRegulatorBayes(regSet, 3.0);
			}
		}

		// loop as long as there are nodes to be filled
		for (int l = 0; l < maxdepth; l++) {
			System.out.println("... assigning level " + l);
			if (l > 0) { // create new trees for all but rootlevel (l=0)
				for (Module mod : moduleSet) {
					// find internal nodes at level l, they only exist if tree has depth >= l
					int depth = mod.hierarchicalTree.treeDepth();
					if (depth > l) {
						// check which regulators are still valid
						HashSet<Gene> removeRegs = new HashSet<Gene>();
						for (Gene reg : modRegs.get(mod))
							if (moduleGraph.testCycle(reg, mod))
								removeRegs.add(reg);
						// remove the ones that are not
						for (Gene reg : removeRegs)
							modRegs.get(mod).remove(reg);
						ArrayList<TreeNode> intern = new ArrayList<TreeNode>();
						mod.hierarchicalTree.gatherInternals(intern);
						for (TreeNode node : intern)
							if (node.treeLevel() == l) { // nodes at level l
								// if modRegs empty, node becomes leaf
								if (modRegs.get(mod).isEmpty()) {
									node.nodeStatus = "leaf";
									node.leftChild = null;
									node.rightChild = null;
									node.regulationSplit = new Split();
								} else { // put node in trees and find candidate regulator
									trees.put(node, mod);
									node.findRegulatorBayes(modRegs.get(mod), 3.0);
								}
							}
					}
				}
			}
			while (trees.size() > 0) {
				// find the regulator with highest score
				double max = Double.NEGATIVE_INFINITY;
				TreeNode bestnode = new TreeNode();
				Module bestmod = new Module();
				for (TreeNode node : trees.keySet()) {
					if (node.regulationSplit.regulatorScore > max) {
						max = node.regulationSplit.regulatorScore;
						bestnode = node;
						bestmod = trees.get(node);
					}
				}
				// best one is added to moduleGraph and parent set
				moduleGraph.addRegulator(bestnode.regulationSplit.regulator, bestmod);
				bestmod.parents.add(bestnode.regulationSplit.regulator);
				modRegs.get(bestmod).remove(bestnode.regulationSplit.regulator);
				// update other nodes
				HashSet<TreeNode> removeNodes = new HashSet<TreeNode>();
				for (TreeNode node : trees.keySet()) {
					if (!node.equals(bestnode)) {
						// check which regulators are still valid
						HashSet<Gene> removeRegs = new HashSet<Gene>();
						for (Gene reg : modRegs.get(trees.get(node)))
							if (moduleGraph.testCycle(reg, trees.get(node)))
								removeRegs.add(reg);
						// remove the ones that are not
						for (Gene reg : removeRegs)
							modRegs.get(trees.get(node)).remove(reg);
						// if modRegs empty, set dummy reg
						if (modRegs.get(trees.get(node)).isEmpty()) {
							//System.out.println("empty regset");
							node.nodeStatus = "leaf";
							node.leftChild = null;
							node.rightChild = null;
							node.regulationSplit = new Split();
							removeNodes.add(node);
						} else if (!modRegs.get(trees.get(node)).contains(node.regulationSplit.regulator)) {
							// check if current best reg still in regset, if not find new reg
							node.findRegulatorBayes(modRegs.get(trees.get(node)), 3.0);
						}
					}
				}
				// remove bestnode and removeNodes from trees
				trees.remove(bestnode);
				for (TreeNode node : removeNodes)
					trees.remove(node);
			}
		}
		System.out.println("... done.");
	}

	/**
	 * Assigns regulators to nodes in the hierarchical trees without 
	 * acyclicity constraints. This method can deal with multiple
	 * trees per module and assigns to each node the regulator with lowest entropy. 
	 * 
	 * @see TreeNode#findRegulator(HashSet)
	 * @author tomic
	 */
	public void assignRegulatorsNoAcycl() {
		System.out.println("\nAssigning regulators ...");
		// check for multiple module sets
		if (this.moduleSets.isEmpty())
			this.moduleSets.add(this.moduleSet);
		for (ArrayList<Module> moduleSet : this.moduleSets) {
			for (Module mod : moduleSet) {
				// check for multiple trees
				boolean oneTree = false;
				if (mod.hierarchicalTrees.isEmpty()) {
					oneTree = true;
					mod.hierarchicalTrees.add(mod.hierarchicalTree);
				}
				for (TreeNode root : mod.hierarchicalTrees) {
					for (TreeNode node : root.getInternalNodes()) {
						node.findRegulatorBayes(this.regulatorSet, 3.0);
						if (oneTree)
							mod.parents.add(node.regulationSplit.regulator);
					}
				}
				// set parent weights if multiple trees
				// default parameters ...
				if (!oneTree) {
					mod.setRegulatorWeights();
					//    				mod.setRegulatorWeightsMax(0, 0.5);
					//    				mod.setRegulatorWeightsSum(0, 0.5);
				}
			}
		}
		System.out.println("... done.");
	}

	/**
	 * Stochastically assigns regulators to nodes in the hierarchical trees
	 * without acyclicity constraints. This method can deal with multiple 
	 * trees per module and assigns to each node a number of regulators with low entropy 
	 * according to a distribution on the entropy values of all regulators for that node. 
	 *
	 * @see TreeNode#findRegulatorStoch(int, HashSet, double).
	 * @param betaMax initial parameter for conditional distribution of
	 * tree direction (left/right) given regulator expression (up/down).
	 * @param numRegAssign number of regulators assigned to each node.
	 * 
	 * @author tomic
	 */
	public void assignRegulatorsNoAcyclStoch(double betaMax, int numRegAssign) {
		System.out.println("Assigning regulators stochastically ...");
		// check for multiple module sets
		if (this.moduleSets.isEmpty())
			this.moduleSets.add(this.moduleSet);
		for (int i = 0; i < this.moduleSets.size(); i++) {
			ArrayList<Module> modset = this.moduleSets.get(i);
			for (Module mod : modset) {
				if (!mod.genes.isEmpty()) {
					System.out.println("module: " + mod.number);
					// check for multiple trees
					boolean oneTree = false;
					if (mod.hierarchicalTrees.isEmpty()) {
						oneTree = true;
						mod.hierarchicalTrees.add(mod.hierarchicalTree);
					}
					for (TreeNode root : mod.hierarchicalTrees) {
						for (TreeNode node : root.getInternalNodes()) {
							node.findRegulatorBayesStoch(numRegAssign, this.regulatorSet, betaMax);
							if (oneTree)
								mod.parents.add(node.regulationSplit.regulator);
						}
					}
					// set parents if multiple trees
					// default parameters ...
					if (!oneTree) {
						mod.setRegulatorWeights();
						mod.setRegulatorWeightsRandom();
					}
				}
			}
		}
		System.out.println("[ok]");
	}

	/**
	 * Stochastically assigns regulators to nodes in the hierarchical trees down
	 * to a certain tree level without acyclicity constraints. This method can deal with multiple 
	 * trees per module and assigns to each node a number of regulators with low entropy 
	 * according to a distribution on the entropy values of all regulators for that node. 
	 *
	 * @see TreeNode#findRegulatorStoch(int, HashSet, double).
	 * @param betaMax initial parameter for conditional distribution of
	 * tree direction (left/right) given regulator expression (up/down).
	 * @param numRegAssign number of regulators assigned to each node.
	 * 
	 * @author tomic
	 */
	public void assignRegulatorsNoAcyclStoch(int level, double betaMax, int numRegAssign) {
		System.out.println("\nAssigning regulators stochastically down to level " + level + " ...");
		// check for multiple module sets
		if (this.moduleSets.isEmpty())
			this.moduleSets.add(this.moduleSet);
		for (int i = 0; i < this.moduleSets.size(); i++) {
			ArrayList<Module> modset = this.moduleSets.get(i);
			System.out.println("... module set " + i + " (" + modset.size() + " modules)");
			for (Module mod : modset) {
				if (!mod.genes.isEmpty()) {
					// check for multiple trees
					if (mod.hierarchicalTrees.isEmpty()) {
						mod.hierarchicalTrees.add(mod.hierarchicalTree);
					}
					for (TreeNode root : mod.hierarchicalTrees) {
						// trim root below level
						root.testLevel(level);
						// assign regulators to remaining nodes
						for (TreeNode node : root.getInternalNodes()) {
							node.findRegulatorBayesStoch(numRegAssign, this.regulatorSet, betaMax);
						}
					}
					// set parents, default parameter ...
					mod.setRegulatorWeights();
					//      				mod.setRegulatorWeightsMax(0, 0.5);
					//      				mod.setRegulatorWeightsSum(0, 0.5);
				}
			}
		}
		System.out.println("... done.");
	}
	
	/**
	 * Stochastically assigns regulators to nodes in the hierarchical trees
	 * without acyclicity constraints. 
	 * 
	 * In this method, the assignment is done for a specific range of modules within the module set.
	 * This allow to run jobs in parallel.
	 *
	 * @param betaMax initial parameter for conditional distribution of
	 * tree direction (left/right) given regulator expression (up/down).
	 * @param numRegAssign number of regulators assigned to each node.
	 * @param moduleStart module number to start for the assignment.
	 * @param ModuleStop module number to stop for the assignment.
	 * 
	 * @author tomic
	 * @author erbon
	 */
	public void assignRegulatorsNoAcyclStoch(double betaMax, int numRegAssign, int moduleStart, int moduleStop) {
		System.out.println("Assigning regulators stochastically...");
		// check for multiple module sets
		if (this.moduleSets.isEmpty())
			this.moduleSets.add(this.moduleSet);
		for (int i = 0; i < this.moduleSets.size(); i++) {
			ArrayList<Module> modset = this.moduleSets.get(i);
			for (int num=moduleStart; num <= moduleStop; num++) {
				Module mod = modset.get(num);
				if (!mod.genes.isEmpty()) {
					System.out.println("module: " + mod.number);
					// check for multiple trees
					boolean oneTree = false;
					if (mod.hierarchicalTrees.isEmpty()) {
						oneTree = true;
						mod.hierarchicalTrees.add(mod.hierarchicalTree);
					}
					for (TreeNode root : mod.hierarchicalTrees) {
						for (TreeNode node : root.getInternalNodes()) {
							node.findRegulatorBayesStoch(numRegAssign, this.regulatorSet, betaMax);
							if (oneTree)
								mod.parents.add(node.regulationSplit.regulator);
						}
					}
					// set parents if multiple trees
					// default parameters ...
					if (!oneTree) {
						mod.setRegulatorWeights();
						mod.setRegulatorWeightsRandom();
					}
				}
			}
		}
		System.out.println("[ok]");
	}

	/**
	 * After assigning regulators, compile the list of actually used regulators in a module set
	 * and assign to each tree node a random regulator from this list.
	 *
	 * @deprecated
	 * @author tomic
	 */
	public void assignRandomRegulators() {
		System.out.println("\nAssigning random regulators ...");
		// check for multiple module sets
		if (this.moduleSets.isEmpty())
			this.moduleSets.add(this.moduleSet);
		for (int i = 0; i < this.moduleSets.size(); i++) {
			ArrayList<Module> modset = this.moduleSets.get(i);
			System.out.println("... module set " + i + " (" + modset.size() + " modules)");
			// assign random regulators
			Random random = new Random();
			for (Module mod : modset) {
				for (TreeNode root : mod.hierarchicalTrees)
					for (TreeNode node : root.getInternalNodes()) {
						node.testSplitsRandom = new ArrayList<Split>(node.testSplits.size());
						for (int k = 0; k < node.testSplitsRandom.size(); k++) {
							Gene reg = this.regulatorSet.get(random.nextInt(this.regulatorSet.size()));
							reg.setNormData(data);
							reg.possibleSplitValues = new ArrayList<Double>();
							for (int m : node.leafDistribution.condSet)
								if (!Double.isNaN(reg.normData[m]))
									reg.possibleSplitValues.add(reg.normData[m]);
							double sv = reg.possibleSplitValues.get(random.nextInt(reg.possibleSplitValues.size()));
							Split splt = new Split(node, reg, sv);
							splt.computeBeta();
							splt.computeRegulatorScore();
							node.testSplitsRandom.add(splt);
						}
					}
				// default parameters ...
				mod.setParentWeightsRandomSum(0, 0.5);
			}

		}
	}

	/**
	 * Heuristic search algorithm to find a (local) maximum of the Bayesian score. 
	 * This algorithm alternates between finding an optimal partitioning of the 
	 * experiments in each module by hierarchical clustering given the current genes in the
	 * module, and reassigning genes to other modules given the current partitioning
	 * of experiments in each module until the Bayesian score can no longer be improved.
	 * 
	 * @see Module#hierarchicalClustering(List, boolean)}
	 * @param scoregain in the final regulation trees, keep only splits which improve
	 * the score by at least this value.
	 * @param useBHCscore use merge score from Bayesian Hierarchical Clustering or 
	 * simple score difference when hierarchically clustering experiments.
	 * @param epsConvergence stop searching for a better solution if the difference in
	 * scores between successive solutions becomes smaller than this value.
	 * @param scoreFiles store network scores during heuristic search to evaluate 
	 * convergence (optional argument).
	 * 
	 * @author tomic
	 */
	public void heuristicSearchMax(double scoregain, boolean useBHCscore, double epsConvergence, String... scoreFiles) {
		Integer numMoves = 0;
		int numMovesTot = 0;
		double fracMoves, currentScore;
		HashMap<Integer, HashSet<Gene>> genesets = new HashMap<Integer, HashSet<Gene>>();
		ArrayList<Integer> allConds = new ArrayList<Integer>(this.numCond);

		for (int m = 0; m < this.numCond; m++)
			allConds.add(m);

		try {
			PrintWriter pw;
			if (scoreFiles.length == 1) { // a score file is given
				File f = new File(scoreFiles[0]);
				FileWriter fw = new FileWriter(f);
				pw = new PrintWriter(fw);
			} else { // no score file given, print scores to stdout
				pw = new PrintWriter(System.out);
			}
			this.bayesianScore();
			pw.println(networkScore / numGenes + "\t" + 0);
			// compute initial statistics and score
			for (Module mod : moduleSet) {
				mod.hierarchicalTree = new TreeNode(mod, normalGammaPrior);
				mod.hierarchicalTree.leafDistribution.condSet = allConds;
				mod.hierarchicalTree.statistics();
				mod.moduleScore = mod.hierarchicalTree.bayesianScore();
				// initialize gene sets
				HashSet<Gene> set = new HashSet<Gene>();
				genesets.put(mod.number, set);
				this.bayesianScore();
				pw.println(networkScore / numGenes + "\t" + 0);
			}
			System.out.println("Initial network score per gene " + networkScore / numGenes);
			do {
				currentScore = networkScore;
				// compute hierarchical cluster trees
				System.out.println("Learning trees ...");
				int numRelearn = 0;
				for (Module mod : moduleSet) {
					// check if gene set has been changed
					if (!mod.genes.equals(genesets.get(mod.number))) {
						List<TreeNode> treeList = mod.initTreeList(allConds);
						TreeNode candidateRoot = mod.hierarchicalClustering(treeList, useBHCscore);
						candidateRoot.testScore(0.0);
						if (candidateRoot.bayesianScore() > mod.moduleScore) {
							mod.hierarchicalTree = candidateRoot;
						}
						mod.moduleScore = mod.hierarchicalTree.bayesianScore();
						this.bayesianScore();
						pw.println(networkScore / numGenes + "\t" + 0);
						// update copy of gene set
						HashSet<Gene> set = new HashSet<Gene>();
						for (Gene gene : mod.genes)
							set.add(gene);
						genesets.put(mod.number, set);
						mod.changed = true;
						numRelearn += 1;
					} else {
						mod.changed = false;
					}
				}
				System.out.println("... learned " + numRelearn + " trees.");
				System.out.println("new network score per gene " + networkScore / numGenes);
				System.out.println("Reassigning genes ...");
				numMovesTot = 0;
				//	reassign genes
				do {
					numMoves = moduleReassign(0.0);
					fracMoves = numMoves.doubleValue() / numGenes;
					numMovesTot += numMoves;
					pw.println(networkScore / numGenes + "\t" + fracMoves);
				} while (numMoves > 0);
				System.out.println("... made " + numMovesTot + " reassignments.");
				System.out.println("new network score per gene " + networkScore / numGenes);
			} while (Math.abs(networkScore - currentScore) / numGenes > epsConvergence);

			// cut trees with score cutoff requirement
			System.out.println("Final tree pruning ...");
			for (Module mod : moduleSet) {
				mod.hierarchicalTree.testScore(scoregain);
				mod.moduleScore = mod.hierarchicalTree.bayesianScore();
			}
			System.out.println("...done.");
			this.bayesianScore();
			pw.println(networkScore / numGenes + "\t" + 0);
			System.out.println("Final network score per gene " + networkScore / numGenes);
			pw.println(networkScore / numGenes + "\t" + 0);
			pw.close();
		} catch (IOException e) {
			System.out.println("IOException: " + e);
		}
	}

	/**
	 * Finds an ensemble of equally likely networks by Gibbs sampling different experiment 
	 * partitions. This method starts with optimal gene assignments to modules, such as
	 * for instance found by 
	 * {@link ModuleNetwork#heuristicSearchMax(double, boolean, double, String[])}.
	 * Then it builds for each module multiple regulation trees by Gibbs sampling different
	 * experiment partions using CRC 
	 * (see {@link Module#gibbsSamplerClustering(int, int, int, int)}).
	 * 
	 * @param numRuns number of Gibbs sampler runs for each module.
	 * @param numSteps number of steps in each run of the Gibbs sampler.
	 * @param burnIn number of intial steps before sampling starts.
	 * @param sampleStep number of steps between successive samples.
	 * @param scoregain in the final regulation trees, keep only splits which improve
	 * the score by at least this value.
	 * @param useBHCscore use merge score from Bayesian Hierarchical Clustering or 
	 * simple score difference when hierarchically clustering experiments.
	 * 
	 * @author anjos, tomic
	 */
	public void gibbsSamplerExpts(int numRuns, int numSteps, int burnIn, int sampleStep, double scoregain, boolean useBHCscore) {
		// generate a set of hierarchical trees for each module
		System.out.println("Gibbs sampling hierarchical trees...");
		// check for multiple module sets
		if (this.moduleSets.isEmpty())
			this.moduleSets.add(this.moduleSet);
		for (int i = 0; i < this.moduleSets.size(); i++) {
			//System.out.println("module set: " + i + " (" + this.moduleSets.get(i).size() + " modules)");
			for (Module mod : this.moduleSets.get(i)) {
				if (!mod.genes.isEmpty()) {
					mod.hierarchicalTrees = new ArrayList<TreeNode>();
					System.out.println("sampling module " + mod.number + " timestamp: " + new java.util.Date().toString());
					HashSet<ArrayList<ArrayList<Integer>>> clusterList = mod.gibbsSamplerClustering(numRuns, numSteps, burnIn, sampleStep);
					for (ArrayList<ArrayList<Integer>> cluster : clusterList) {
						mod.hierarchicalTree = mod.hierarchicalClusteringOrdered(mod.partition2TreeNode(cluster), useBHCscore);
						mod.hierarchicalTree.testScore(scoregain);
						mod.hierarchicalTrees.add(mod.hierarchicalTree);
					}
				}
			}
		}
		System.out.println("done.");
	}

	/**
	 * Finds an ensemble of equally likely networks by Gibbs sampling gene modules and 
	 * experiment partitions using CRC. It is ideally followed by 
	 * {@link ModuleNetwork#gibbsSamplerExpts(int, int, int, int, double, boolean)} to sample
	 * additional experiment partitions for the converged gene modules.
	 * 
	 * @param numRuns number of Gibbs sampler runs.
	 * @param numSteps number of steps in each run of the Gibbs sampler.
	 * @param burnIn number of intial steps before sampling starts.
	 * @param sampleStep number of steps between successive samples.
	 * @param scoregain in the final regulation trees, keep only splits which improve
	 * the score by at least this value.
	 * @param useBHCscore use merge score from Bayesian Hierarchical Clustering or 
	 * simple score difference when hierarchically clustering experiments.
	 * 
	 * @author anjos, tomic
	 *  
	 */
	public void gibbsSamplerGenes(int initNumClust, int numRuns, int numSteps, int burnIn, int sampleStep, double scoregain, boolean useBHCscore) {
		System.out.println("Gibbs sampling modules...");
		
		// set initial number of clusters to half the number of genes if no specific value was given
		if (initNumClust == 0)
			initNumClust = this.geneSet.size()/2;
		System.out.println("Initial number of clusters: "+initNumClust);
		
		// check if we need to cluster the full data set
		double[][] subData;
		if (this.geneSet.size() != this.data.length) {
			System.out.println(this.geneSet.size());
			subData = new double[this.geneSet.size()][this.data[0].length];
			for (int k = 0; k < this.geneSet.size(); k++)
				subData[k] = this.data[this.geneSet.get(k).number];
		} else
			subData = this.data;
		this.moduleSets = new ArrayList<ArrayList<Module>>();
		for (int n = 1; n <= numRuns; n++) {
			//System.out.println("... run " + n);
			GibbsSampler clust = new GibbsSampler(subData);
			clust.RandomAssignTwoway(initNumClust);
			clust.Cluster2way();
			for (int k = 1; k <= numSteps; k++) {
				System.out.println("step "+k+" timestamp: "+ new java.util.Date().toString());
				clust.Cluster2way();
				if (k > burnIn && (numSteps - k) % sampleStep == 0) {
					System.out.println("sampling...");
					int numModules = clust.num_cluster;
					//   add genes to modules from chineseCluster o/p
					ArrayList<Module> modules = new ArrayList<Module>(numModules);
					for (int i = 0; i < numModules; i++) {
						modules.add(new Module(this, i, this.normalGammaPrior));
						for (Integer row : clust.ClusterSet.get(i).RowSet)
							modules.get(i).genes.add(this.geneSet.get(row));
						// get experiment partitions into list
						ArrayList<ArrayList<Integer>> expClusters = new ArrayList<ArrayList<Integer>>();
						for (int m = 0; m < clust.ClusterSet.get(i).Columns.size(); m++) {
							ArrayList<Integer> expList = new ArrayList<Integer>();
							for (int in : clust.ClusterSet.get(i).Columns.get(m).ColumnSet)
								expList.add(in);
							expClusters.add(expList);
						}
						// convert experiment partitions to list of leaves
						List<TreeNode> leafList = modules.get(i).partition2TreeNode(expClusters);
						// hierarchically link leaves and store in tree
						modules.get(i).hierarchicalTree = modules.get(i).hierarchicalClusteringOrdered(leafList, useBHCscore);
						modules.get(i).hierarchicalTree.testScore(scoregain);
					}
					this.moduleSet = modules;
					this.moduleSets.add(modules);
				}
			}
		}
		System.out.println("[ok]");
	}

	/**
	 * Sets parameters of all testSplits of all nodes, needed after reading xml file.
	 * 
	 * @author tomic
	 */
	public void setTestSplits() {
		for (ArrayList<Module> modlist : this.moduleSets)
			for (Module mod : modlist) {
				for (TreeNode root : mod.hierarchicalTrees)
					for (TreeNode node : root.getInternalNodes()) {
						for (Split splt : node.testSplits) {
							splt.computeSign();
							splt.computeBeta();
							//splt.computeRegulatorScore();
						}
						for (Split splt : node.testSplitsRandom) {
							splt.computeSign();
							splt.computeBeta();
							//splt.computeRegulatorScore();
						}
					}
				mod.setRegulatorWeights();
				//mod.setRegulatorWeightsSum(0, 0);
				mod.setRegulatorWeightsRandom();
			}
	}

	/**
	 * Saves a module network to a file with name generated by
	 * current date, time, and a random number. Useful when submitting multiple
	 * jobs to a cluster
	 *
	 * @author tomic
	 */
	public void save() {
		Date today = new Date();
		DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.GERMAN);
		String output = "network_" + df.format(today) + "_";
		df = DateFormat.getTimeInstance(DateFormat.MEDIUM, Locale.GERMAN);
		// append random number to make sure file name is unique
		Random rand = new Random();
		output += df.format(today) + "_" + rand.nextDouble() + ".xml.gz";
		this.writeXML(output);
	}

	/**
	 * Writes the module network to XML format directly into a file. Specification of the XML document will be given
	 * elsewhere, when the time has come to do this.
	 * 
	 * @deprecated
	 * @param String Output file name
	 * 
	 * @author anjos, tomic, erbon
	 */
	public void writeXML(String xmlFile) {

		try {

			GZIPOutputStream gzos = new GZIPOutputStream(new FileOutputStream(xmlFile));
			PrintWriter out = new PrintWriter(gzos);

			out.write("<?xml version='1.0' encoding='iso-8859-1'?>\n\n");

			out.write("<ModuleNetwork>\n");

			//    		out.write("<Algorithm name=\"LeMoNe\">\n");
			//    		for (String par : this.parameters.keySet()){
			//    			out.write("<Parameter name=\"" + par +  "\" + value=\"" + this.parameters.get(par) + "\">\n");
			//    			out.write("</Parameter>\n");
			//    		}
			//    		out.write("</Algorithm>\n");

			out.write("<Experiments>");
			if (conditionSet != null) {
				for (Experiment e : conditionSet)
					out.write(e.name + "\t");
			}
			out.write("</Experiments>\n");
			
			out.write("<DataMatrix>\n");

			for (int i = 0; i < numGenes; i++) {
				out.write(geneSet.get(i).name + "\t");
				out.write(geneSet.get(i).ORFname + "\t");
				out.write(geneSet.get(i).description + "\t");
				for (int j = 0; j < numCond; j++)
					out.write(data[i][j] + "\t");
				out.write("\n");
			}

			out.write("</DataMatrix>\n");

			out.write("<DataType value=\"" + this.dataType.toString() + "\">\n");
			out.write("</DataType>\n");

			out.write("<Regulators>\n");

			for (Gene reg : this.regulatorSet) {
				out.write("<Regulator id=\"" + reg.number + "\" name=\"" + reg.name + "\" discrete=\"" + reg.getDiscrete() + "\">\n");
				out.write("</Regulator>\n");
			}
			out.write("</Regulators>\n");

			out.write("<NormalGammaPrior lambda0=\"" + this.normalGammaPrior[0] + "\" mu0=\"" + this.normalGammaPrior[1] + "\" alpha0=\"" + this.normalGammaPrior[2] + "\" beta0=\"" + this.normalGammaPrior[3] + "\">\n");
			out.write("</NormalGammaPrior>\n");

			// check for multiple module sets
			if (this.moduleSets.isEmpty())
				this.moduleSets.add(this.moduleSet);

			out.write("<NumModuleSets value=\"" + this.moduleSets.size() + "\">");
			out.write("</NumModuleSets>\n");

			for (ArrayList<Module> modset : this.moduleSets) {
				out.write("<ModuleSet>\n");

				out.write("<NumModules value=\"" + modset.size() + "\">");
				out.write("</NumModules>\n");

				for (int i = 0; i < modset.size(); i++) {
					Module mod = modset.get(i);
					out.write("<Module id=\"" + mod.number + "\">\n");
					out.write("<Genes>\n");
					for (Gene gene : mod.genes) {
						out.write("<Gene id=\"" + gene.number + "\" name=\"" + gene.name + "\">");
						out.write("</Gene>");
					}
					out.write("</Genes>\n");

					out.write("<RegulatorWeights>\n");
					for (Gene reg : mod.regulatorWeights.keySet()) {
						double w = mod.regulatorWeights.get(reg);
						out.write("<Regulator id=\"" + reg.number + "\" name=\"" + reg.name + "\" weight=\"" + w + "\">\n");
						out.write("</Regulator>\n");
					}
					out.write("</RegulatorWeights>\n");

					out.write("<RegulationTrees>\n");
					// one tree or multiple?
					if (mod.hierarchicalTrees.isEmpty()) {
						out.write("<Root>");
						StringBuffer sb2 = new StringBuffer();
						mod.hierarchicalTree.toXML(sb2);
						out.write(sb2.toString());
						out.write("</Root>\n");
					} else {
						for (TreeNode node : mod.hierarchicalTrees) {
							out.write("<Root>");
							StringBuffer sb2 = new StringBuffer();
							node.toXML(sb2);
							out.write(sb2.toString());
							out.write("</Root>\n");
						}
					}
					out.write("</RegulationTrees>\n");
					out.write("</Module>\n");
				}
				out.write("</ModuleSet>\n");
			}
			out.write("</ModuleNetwork>\n");
			out.close();
		} catch (IOException e) {
			System.out.println(e);
		}
	} // end writeXML


	/**
	 * Write regulation tree(s) to xml file.
	 * 
	 * @param xmlFile file name
	 * 
	 * @author eric
	 */
	public void writeRegTreeXML(String xmlFile) {
		
		System.out.print("Writing xml file "+xmlFile);
		try {

			GZIPOutputStream gzos = new GZIPOutputStream(new FileOutputStream(xmlFile));
			PrintWriter out = new PrintWriter(gzos);

			out.println("<?xml version='1.0' encoding='iso-8859-1'?>");
			out.println("<ModuleNetwork>");
			
			// here we assume that we have one moduleSet
			for (int i = 0; i < moduleSet.size(); i++) {
				Module mod = moduleSet.get(i);
				out.println("<Module id=\"" + mod.number + "\">");
				out.println("<RegulationTrees>");
				if (mod.hierarchicalTrees.isEmpty()) {
					out.println("<Root>");
					StringBuffer sb2 = new StringBuffer();
					mod.hierarchicalTree.toXML(sb2);
					out.println(sb2.toString());
					out.println("</Root>");
				} else {
					for (TreeNode node : mod.hierarchicalTrees) {
						out.println("<Root>");
						StringBuffer sb2 = new StringBuffer();
						node.toXML(sb2);
						out.println(sb2.toString());
						out.println("</Root>");
					}
				}
				out.println("</RegulationTrees>");
				out.println("</Module>");
			}
			out.println("</ModuleNetwork>");
			out.close();
			System.out.println(" [ok]");
		} catch (IOException e) {
			System.out.println("Error: an IO Exception occured.");
			System.out.println(e);
			System.exit(1);
		}
	}
	
	/**
	 * Read regulation trees from xml file.
	 * Data matrix and clusters should have been set before.
	 * 
	 * @param xmlFile
	 * @throws SAXParseException
	 * @throws IOException
	 * 
	 * @author eric
	 */
	public void readRegTreeXML(String xmlFile) {
		
		System.out.print("Reading XML file "+xmlFile);

		// Read from gzipped file
		StringBuffer buf = new StringBuffer();
		try {
			BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(xmlFile))));

			String line;
			while ((line = br.readLine()) != null) {
				buf.append(line);
			}
			br.close();
		}
		catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
		nu.xom.Document dom = XmlXomReader.getDocument(buf.toString());
		
		//get xml doc root
		nu.xom.Element root = dom.getRootElement();

		if (!root.getLocalName().equals("ModuleNetwork")) {
			//throw new SAXParseException("<ModuleNetwork> node not found", null);
			System.out.println("Error: root node <ModuleNetwork> not found in xml file.");
			System.exit(1);
		}

		// set normalized data and possible split values
		for (Gene g : this.regulatorSet) {
			g.setNormData(this.data);
			g.possibleSplitValues = new ArrayList<Double>();
			for (double x : this.data[g.number])
				g.possibleSplitValues.add(x);
		}

		// read modules
		Elements modules = root.getChildElements("Module");
		for (int j = 0; j < modules.size(); j++) {
			Element modElement = modules.get(j);
			int modnum = Integer.parseInt(modElement.getAttributeValue("id"));
			Module mod = moduleMap.get(modnum);
			// read regulation trees
			Elements roots = modElement.getFirstChildElement("RegulationTrees").getChildElements("Root");
			for (int l = 0; l < roots.size(); l++) {
				Element te = roots.get(l);
				// each "Root" has one child element that is the root TreeNode
				Element rootnode = te.getFirstChildElement("TreeNode");
				// there may be multiple trees for the same module
				// overwrite hierarchicalTree each time but also add to hierarchicalTrees
				TreeNode node = new TreeNode(mod, this.normalGammaPrior);
				node.fromXML(rootnode);
				for (TreeNode nd : node.getInternalNodes()) {
					for (Split splt : nd.testSplits) {
						splt.computeSign();
						splt.computeBeta();
					}
				}
				mod.hierarchicalTrees.add(node);
				mod.hierarchicalTree = node;
			}
		}
		System.out.println(" [ok]");
	}

	
	/**
	 * Creates a module network from an XML file. Specification of XML document
	 * will be given elsewhere.
	 * 
	 * @deprecated
	 * @param xmlFile name of XML file
	 * @throws SAXParseException
	 * @throws IOException
	 * 
	 * @author anjos, tomic, erbon
	 */
	public void fromXML(String xmlFile) throws SAXParseException, IOException {
		System.out.println("Reading XML file ...");

		// Read from gzipped file
		BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(xmlFile))));

		StringWriter sw = new StringWriter();
		PrintWriter pw = new PrintWriter(sw);
		String line;
		while ((line = br.readLine()) != null) {
			pw.println(line);
		}
		br.close();
		String xml = sw.toString();
		
		// destroy this to decrease memory footprint
		sw = null;

		nu.xom.Document dom = XmlXomReader.getDocument(xml);
		
		// destroy this to decrease memory footprint
		xml = null;
		
		//get xml doc root
		nu.xom.Element root = dom.getRootElement();

		if (!root.getLocalName().equals("ModuleNetwork")) {
			throw new SAXParseException("<ModuleNetwork> node not found", null);
		}

		// read parameter settings
		this.parameters = new HashMap<String, String>();
		Element algo = root.getFirstChildElement("Algorithm");
		if (algo != null) {
			Elements params = algo.getChildElements("parameter");
			for (int i = 0; i < params.size(); i++) {
				String name = params.get(i).getAttributeValue("name");
				String value = params.get(i).getAttributeValue("value");
				this.parameters.put(name, value);
			}
		}
		
		// Read and set experiments names
		Element exp = root.getFirstChildElement("Experiments");
		if (exp != null) {
			String[] expline = exp.getValue().split("\\t");
			this.conditionSet = new ArrayList<Experiment>(expline.length);
			for (int i = 0; i < expline.length; i++) {
				this.conditionSet.add(new Experiment(expline[i], i));
			}
		}

		// read data matrix
		System.out.println("... data");
		String data = root.getFirstChildElement("DataMatrix").getValue();
		String[] data1 = data.split("\\n");
		String[] data2 = data1[1].split("\\t");
		int numGenes = data1.length - 1;
		this.numGenes = numGenes;
		int numCond = data2.length - 3;
		this.numCond = numCond;
		this.data = new double[numGenes][numCond];
		this.geneSet = new ArrayList<Gene>(numGenes);
		this.geneMap = new HashMap<String, Gene>();
		for (int i = 0; i < numGenes; i++) {
			data2 = data1[i + 1].split("\\t");
			this.geneSet.add(new Gene(data2[0], data2[1], i));
			this.geneMap.put(this.geneSet.get(i).name, this.geneSet.get(i));
			for (int j = 0; j < numCond; j++) {
				try {
					this.data[i][j] = Double.parseDouble(data2[j + 3]);
				} catch (InputMismatchException e) {
					this.data[i][j] = Double.NaN;
				}
			}
		}

		double nrDataPoints = 0;
		double sumSquare = 0.0;
		this.dataMean = 0.0;
		this.dataSigma = 0.0;
		this.dataMin = Double.POSITIVE_INFINITY;
		this.dataMax = Double.NEGATIVE_INFINITY;
		for (int i = 0; i < this.data.length; i++) {
			for (int j = 0; j < this.data[i].length; j++) {
				if (!Double.isNaN(this.data[i][j])) {
					this.dataMean += this.data[i][j];
					sumSquare += Math.pow(this.data[i][j], 2);
					nrDataPoints += 1;
					if (this.data[i][j] < this.dataMin)
						this.dataMin = this.data[i][j];
					else if (this.data[i][j] > this.dataMax)
						this.dataMax = this.data[i][j];
				}
			}
		}

		this.dataMean /= (double) nrDataPoints;
		this.dataSigma = Math.sqrt(sumSquare - nrDataPoints * Math.pow(this.dataMean, 2)) / Math.sqrt((double) nrDataPoints);

		Element datatype = root.getFirstChildElement("DataType");
		if (datatype != null) {
			String value = datatype.getAttributeValue("value");
			if (value.equals("RATIO"))
				this.dataType = dataType.RATIO;
			else if (value.equals("AFFY"))
				this.dataType = dataType.AFFY;
		} else { // set default to RATIO
			this.dataType = dataType.RATIO;
		}
		System.out.printf(this.dataType.toString() + " dataMean %f, dataSigma %f, dataMin %f, dataMax %f, numGenes %d\n", this.dataMean, this.dataSigma, this.dataMin, this.dataMax, this.numGenes);

		// read regulators
		System.out.println("... regulators");
		this.regulatorSet = new ArrayList<Gene>();
		Elements regulators = root.getFirstChildElement("Regulators").getChildElements();
		for (int i = 0; i < regulators.size(); i++) {
			int regnum = Integer.parseInt(regulators.get(i).getAttributeValue("id"));
			boolean flag = Boolean.parseBoolean(regulators.get(i).getAttributeValue("discrete"));
			regulatorSet.add(this.geneSet.get(regnum));
			this.geneSet.get(regnum).setDiscrete(flag);
		}

		// set normalized data and possible split values
		for (Gene g : this.regulatorSet) {
			g.setNormData(this.data);
			g.possibleSplitValues = new ArrayList<Double>();
			for (double x : this.data[g.number])
				g.possibleSplitValues.add(x);
		}

		// read normal-gamma prior
		Element prior = root.getFirstChildElement("NormalGammaPrior");
		this.normalGammaPrior = new double[4];
		this.normalGammaPrior[0] = Double.parseDouble(prior.getAttributeValue("lambda0"));
		this.normalGammaPrior[1] = Double.parseDouble(prior.getAttributeValue("mu0"));
		this.normalGammaPrior[2] = Double.parseDouble(prior.getAttributeValue("alpha0"));
		this.normalGammaPrior[3] = Double.parseDouble(prior.getAttributeValue("beta0"));

		// read module sets
		Elements modsets = root.getChildElements("ModuleSet");
		this.moduleSets = new ArrayList<ArrayList<Module>>(modsets.size());
		for (int i = 0; i < modsets.size(); i++) {
			System.out.println("... module set " + i);
			// read modules
			Elements modules = modsets.get(i).getChildElements("Module");
			// module set
			ArrayList<Module> modset = new ArrayList<Module>(modules.size());
			for (int j = 0; j < modules.size(); j++) {
				Element modElement = modules.get(j);
				int modnum = Integer.parseInt(modElement.getAttributeValue("id"));
				// initialize
				Module mod = new Module(this, modnum, this.normalGammaPrior);
				// read genes
				Elements genes = modElement.getFirstChildElement("Genes").getChildElements();
				for (int k = 0; k < genes.size(); k++) {
					Element ge = genes.get(k);
					int genenum = Integer.parseInt(ge.getAttributeValue("id"));
					mod.genes.add(this.geneSet.get(genenum));
				}
				// read regulator weights
				Element regW = modElement.getFirstChildElement("RegulatorWeights");
				if (regW != null) {
					Elements regs = regW.getChildElements();
					for (int k = 0; k < regs.size(); k++) {
						Element reg = regs.get(k);
						String regname = reg.getAttributeValue("name");
						double weight = Double.parseDouble(reg.getAttributeValue("weight"));
						mod.regulatorWeights.put(this.geneMap.get(regname), weight);
					}
				}
				// read regulation trees
				Elements roots = modElement.getFirstChildElement("RegulationTrees").getChildElements("Root");
				for (int l = 0; l < roots.size(); l++) {
					Element te = roots.get(l);
					// each "Root" has one child element that is the root TreeNode
					Element rootnode = te.getFirstChildElement("TreeNode");
					// there may be multiple trees for the same module
					// overwrite hierarchicalTree each time but also add to hierarchicalTrees
					TreeNode node = new TreeNode(mod, this.normalGammaPrior);
					node.fromXML(rootnode);
					for (TreeNode nd : node.getInternalNodes()) {
						for (Split splt : nd.testSplits) {
							splt.computeSign();
							splt.computeBeta();
						}
					}
					mod.hierarchicalTrees.add(node);
					mod.hierarchicalTree = node;
				}

				// default parameters ...
				int lev = 0;
				double fac = 1.0;
				if (this.parameters.containsKey("downWeightLevel") && this.parameters.containsKey("downWeightFactor")) {
					lev = Integer.parseInt(this.parameters.get("downWeightLevel"));
					fac = Double.parseDouble(this.parameters.get("downWeightFactor"));
				}
				//mod.setRegulatorWeightsMax(lev, fac);
				//mod.setRegulatorWeightsSum(lev, fac);
				//mod.setParentWeightsRandomSum(lev, fac);
				//mod.setParentSigns();
				// add to module set
				modset.add(mod);
			}
			// add to list
			this.moduleSets.add(modset);
			this.moduleSet = modset;
		}
		System.out.println("... done.");
	}

	

	
	/**
	 * Read the module sets from an XML file, skipping all other information. 
	 * Specification of XML document will be given elsewhere.
	 * 
	 * @deprecated
	 * @param xmlFile name of XML file
	 * @throws SAXParseException
	 * @throws IOException
	 * 
	 * @author anjos, tomic, erbon
	 */
	public void fromXMLModSets(String xmlFile) throws SAXParseException, IOException {
		System.out.println("Reading XML file ...");

		// Read from gzipped file
		BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(xmlFile))));

		StringWriter sw = new StringWriter();
		PrintWriter pw = new PrintWriter(sw);
		String line;
		while ((line = br.readLine()) != null) {
			pw.println(line);
		}
		br.close();
		String xml = sw.toString();

		nu.xom.Document dom = XmlXomReader.getDocument(xml);
		nu.xom.Element root = dom.getRootElement();

		if (!root.getLocalName().equals("ModuleNetwork")) {
			throw new SAXParseException("<ModuleNetwork> node not found", null);
		}

		// read module sets
		Elements modsets = root.getChildElements("ModuleSet");
		int numsets = this.moduleSets.size();
		for (int i = 0; i < modsets.size(); i++) {
			int num = numsets + i;
			System.out.println("... module set " + num);
			// read modules
			Elements modules = modsets.get(i).getChildElements("Module");
			// module set
			ArrayList<Module> modset = new ArrayList<Module>(modules.size());
			for (int j = 0; j < modules.size(); j++) {
				Element modElement = modules.get(j);
				int modnum = Integer.parseInt(modElement.getAttributeValue("id"));
				// initialize
				Module mod = new Module(this, modnum, this.normalGammaPrior);
				// read genes
				Elements genes = modElement.getFirstChildElement("Genes").getChildElements();
				for (int k = 0; k < genes.size(); k++) {
					Element ge = genes.get(k);
					int genenum = Integer.parseInt(ge.getAttributeValue("id"));
					mod.genes.add(this.geneSet.get(genenum));
				}
				// read regulator weights
				Element regW = modElement.getFirstChildElement("RegulatorWeights");
				if (regW != null) {
					Elements regs = regW.getChildElements();
					for (int k = 0; k < regs.size(); k++) {
						Element reg = regs.get(k);
						String regname = reg.getAttributeValue("name");
						double weight = Double.parseDouble(reg.getAttributeValue("weight"));
						mod.regulatorWeights.put(this.geneMap.get(regname), weight);
					}
				}
				// read regulation trees
				Elements roots = modElement.getFirstChildElement("RegulationTrees").getChildElements("Root");
				for (int l = 0; l < roots.size(); l++) {
					Element te = roots.get(l);
					// each "Root" has one child element that is the root TreeNode
					Element rootnode = te.getFirstChildElement("TreeNode");
					// there may be multiple trees for the same module
					// overwrite hierarchicalTree each time but also add to hierarchicalTrees
					TreeNode node = new TreeNode(mod, this.normalGammaPrior);
					node.fromXML(rootnode);
					mod.hierarchicalTrees.add(node);
					mod.hierarchicalTree = node;
				}

				// default parameters ...
				//int lev=0;
				//double fac=1.0;
				//if (this.parameters.containsKey("downWeightLevel") && this.parameters.containsKey("downWeightFactor")){
				//	lev = Integer.parseInt(this.parameters.get("downWeightLevel"));
				//	fac = Double.parseDouble(this.parameters.get("downWeightFactor"));
				//}

				//mod.setRegulatorWeightsMax(lev, fac);
				//mod.setRegulatorWeightsSum(lev, fac);
				//mod.setParentWeightsRandomSum(lev, fac);
				//mod.setParentSigns();
				// add to module set
				modset.add(mod);
			}
			// add to list
			this.moduleSets.add(modset);
			this.moduleSet = modset;
		}
		System.out.println("... done.");
	}

	/**
	 * Read xml file and set only data + modules. To be used with fromXMLTrees function
	 * to get a complete module network object
	 * 
	 * @deprecated
	 * @param xmlFile
	 * 
	 * @author erbon
	 */
	public void fromXMLDataMod(String xmlFile) {
		System.out.println("Reading Data and Modules...");

		try {
			// Build xom Document directly from gzip input stream
			BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(xmlFile))));
			Builder parser = new Builder();
			Document dom = parser.build(br);
			
			//get xml doc root
			nu.xom.Element root = dom.getRootElement();
	
			if (!root.getLocalName().equals("ModuleNetwork")) {
				throw new SAXParseException("<ModuleNetwork> node not found", null);
			}
	
			// read parameter settings
			this.parameters = new HashMap<String, String>();
			Element algo = root.getFirstChildElement("Algorithm");
			if (algo != null) {
				Elements params = algo.getChildElements("parameter");
				for (int i = 0; i < params.size(); i++) {
					String name = params.get(i).getAttributeValue("name");
					String value = params.get(i).getAttributeValue("value");
					this.parameters.put(name, value);
				}
			}
			
			// Read and set experiments names
			Element exp = root.getFirstChildElement("Experiments");
			if (exp != null) {
				String[] expline = exp.getValue().split("\\t");
				this.conditionSet = new ArrayList<Experiment>(expline.length);
				for (int i = 0; i < expline.length; i++) {
					this.conditionSet.add(new Experiment(expline[i], i));
				}
			}
	
			// read data matrix; compute mean and standard deviation
			System.out.println("... data");
			String data = root.getFirstChildElement("DataMatrix").getValue();
			String[] data1 = data.split("\\n");
			String[] data2 = data1[1].split("\\t");
			int numGenes = data1.length - 1;
			this.numGenes = numGenes;
			int numCond = data2.length - 3;
			this.numCond = numCond;
			this.data = new double[numGenes][numCond];
			this.geneSet = new ArrayList<Gene>(numGenes);
			this.geneMap = new HashMap<String, Gene>();
			for (int i = 0; i < numGenes; i++) {
				data2 = data1[i + 1].split("\\t");
				this.geneSet.add(new Gene(data2[0], data2[1], i));
				this.geneMap.put(this.geneSet.get(i).name, this.geneSet.get(i));
				for (int j = 0; j < numCond; j++) {
					try {
						this.data[i][j] = Double.parseDouble(data2[j + 3]);
					} catch (InputMismatchException e) {
						this.data[i][j] = Double.NaN;
					}
				}
			}
	
			double nrDataPoints = 0;
			double sumSquare = 0.0;
			this.dataMean = 0.0;
			this.dataSigma = 0.0;
			this.dataMin = Double.POSITIVE_INFINITY;
			this.dataMax = Double.NEGATIVE_INFINITY;
			for (int i = 0; i < this.data.length; i++) {
				for (int j = 0; j < this.data[i].length; j++) {
					if (!Double.isNaN(this.data[i][j])) {
						this.dataMean += this.data[i][j];
						sumSquare += Math.pow(this.data[i][j], 2);
						nrDataPoints += 1;
						if (this.data[i][j] < this.dataMin)
							this.dataMin = this.data[i][j];
						else if (this.data[i][j] > this.dataMax)
							this.dataMax = this.data[i][j];
					}
				}
			}
	
			this.dataMean /= (double) nrDataPoints;
			this.dataSigma = Math.sqrt(sumSquare - nrDataPoints * Math.pow(this.dataMean, 2)) / Math.sqrt((double) nrDataPoints);
			
			// set data type
			Element datatype = root.getFirstChildElement("DataType");
			if (datatype != null) {
				String value = datatype.getAttributeValue("value");
				if (value.equals("RATIO"))
					this.dataType = dataType.RATIO;
				else if (value.equals("AFFY"))
					this.dataType = dataType.AFFY;
			} else { // set default to RATIO
				this.dataType = dataType.RATIO;
			}
			
			System.out.printf(this.dataType.toString() + " dataMean %f, dataSigma %f, dataMin %f, dataMax %f, numGenes %d\n", this.dataMean, this.dataSigma, this.dataMin, this.dataMax, this.numGenes);
	
			// read regulators
			System.out.println("... regulators");
			this.regulatorSet = new ArrayList<Gene>();
			Elements regulators = root.getFirstChildElement("Regulators").getChildElements();
			for (int i = 0; i < regulators.size(); i++) {
				int regnum = Integer.parseInt(regulators.get(i).getAttributeValue("id"));
				boolean flag = Boolean.parseBoolean(regulators.get(i).getAttributeValue("discrete"));
				regulatorSet.add(this.geneSet.get(regnum));
				this.geneSet.get(regnum).setDiscrete(flag);
			}
	
			// set normalized data and possible split values
			for (Gene g : this.regulatorSet) {
				g.setNormData(this.data);
				g.possibleSplitValues = new ArrayList<Double>();
				for (double x : this.data[g.number])
					g.possibleSplitValues.add(x);
			}
	
			// read normal-gamma prior
			Element prior = root.getFirstChildElement("NormalGammaPrior");
			this.normalGammaPrior = new double[4];
			this.normalGammaPrior[0] = Double.parseDouble(prior.getAttributeValue("lambda0"));
			this.normalGammaPrior[1] = Double.parseDouble(prior.getAttributeValue("mu0"));
			this.normalGammaPrior[2] = Double.parseDouble(prior.getAttributeValue("alpha0"));
			this.normalGammaPrior[3] = Double.parseDouble(prior.getAttributeValue("beta0"));
	
			// read module sets
			Elements modsets = root.getChildElements("ModuleSet");
			this.moduleSets = new ArrayList<ArrayList<Module>>(modsets.size());
			for (int i = 0; i < modsets.size(); i++) {
				System.out.println("... module set " + i);
				// read modules
				Elements modules = modsets.get(i).getChildElements("Module");
				// module set
				ArrayList<Module> modset = new ArrayList<Module>(modules.size());
				for (int j = 0; j < modules.size(); j++) {
					Element modElement = modules.get(j);
					int modnum = Integer.parseInt(modElement.getAttributeValue("id"));
					// initialize
					Module mod = new Module(this, modnum, this.normalGammaPrior);
					// read genes
					Elements genes = modElement.getFirstChildElement("Genes").getChildElements();
					for (int k = 0; k < genes.size(); k++) {
						Element ge = genes.get(k);
						int genenum = Integer.parseInt(ge.getAttributeValue("id"));
						mod.genes.add(this.geneSet.get(genenum));
					}
					// read regulator weights
//					Element regW = modElement.getFirstChildElement("RegulatorWeights");
//					if (regW != null) {
//						Elements regs = regW.getChildElements();
//						for (int k = 0; k < regs.size(); k++) {
//							Element reg = regs.get(k);
//							String regname = reg.getAttributeValue("name");
//							double weight = Double.parseDouble(reg.getAttributeValue("weight"));
//							mod.regulatorWeights.put(this.geneMap.get(regname), weight);
//						}
//					}
					// do NOT read regulation trees
					//Elements roots = modElement.getFirstChildElement("RegulationTrees").getChildElements("Root");
					//for (int l = 0; l < roots.size(); l++) {
					//	Element te = roots.get(l);
						// each "Root" has one child element that is the root TreeNode
					//	Element rootnode = te.getFirstChildElement("TreeNode");
						// there may be multiple trees for the same module
						// overwrite hierarchicalTree each time but also add to hierarchicalTrees
					//	TreeNode node = new TreeNode(mod, this.normalGammaPrior);
					//	node.fromXML(rootnode);
					//	for (TreeNode nd : node.getInternalNodes()) {
					//		for (Split splt : nd.testSplits) {
					//			splt.computeSign();
					//			splt.computeBeta();
					//		}
					//	}
					//	mod.hierarchicalTrees.add(node);
					//	mod.hierarchicalTree = node;
          //}
	
					// default parameters ...
//					int lev = 0;
//					double fac = 1.0;
//					if (this.parameters.containsKey("downWeightLevel") && this.parameters.containsKey("downWeightFactor")) {
//						lev = Integer.parseInt(this.parameters.get("downWeightLevel"));
//						fac = Double.parseDouble(this.parameters.get("downWeightFactor"));
//					}
					// add the module to the list
					modset.add(mod);
				}
				// add module list to the module sets
				this.moduleSets.add(modset);
				this.moduleSet = modset;
			}
			System.out.println("Done.");
		}
		catch (IOException ex) {
			System.out.println("An IO Exception occured");
			ex.printStackTrace();
			System.exit(1);
		}
		catch (SAXParseException ex) {
			System.out.println("A SAX exception occured");
			ex.printStackTrace();
			System.exit(1);
		}
		catch (ValidityException ex) {
			System.out.println("A validity exception occured");
			ex.printStackTrace();
			System.exit(1);
		}
		catch (ParsingException ex) {
			System.out.println("A parsing exception occured");
			ex.printStackTrace();
			System.exit(1);
		}
		
	}

	/**
	 * Read xml file and set hierarchical trees. Set only modules with non-empty trees, to be used 
	 * with splitted regulator assignment files. 
	 * 
	 * @deprecated
	 * @param xmlFile xml file name
	 * @param id2mod a module id to module object hashmap.
	 * 
	 * @author erbon
	 */
	public void fromXMLTrees (String xmlFile, HashMap<Integer,Module> id2mod) {
		System.out.println("Reading trees from "+xmlFile);
		
		try {
			// Build xom Document directly from gzip input stream
			BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(xmlFile))));
			Builder parser = new Builder();
			Document dom = parser.build(br);

			//get xml doc root
			nu.xom.Element root = dom.getRootElement();

			if (!root.getLocalName().equals("ModuleNetwork")) {
				throw new SAXParseException("<ModuleNetwork> node not found", null);
			}
			Elements modsets = root.getChildElements("ModuleSet");
			
			// read modules, we assume here that we have only ONE moduleset
			Elements modules = modsets.get(0).getChildElements("Module");
			
			// module set
			//ArrayList<Module> modset = new ArrayList<Module>(modules.size());
			for (int j = 0; j < modules.size(); j++) {
				
				Element modElement = modules.get(j);
				int modnum = Integer.parseInt(modElement.getAttributeValue("id"));
				//System.out.println("module: " + modnum);
				// read regulation trees
				Elements roots = modElement.getFirstChildElement("RegulationTrees").getChildElements("Root");
				for (int l = 0; l < roots.size(); l++) {
					Element te = roots.get(l);
					// each "Root" has one child element that is the root TreeNode
					Element rootnode = te.getFirstChildElement("TreeNode");
					//System.out.println(rootnode.toXML());
					// there may be multiple trees for the same module
					// overwrite hierarchicalTree each time but also add to hierarchicalTrees
					TreeNode node = new TreeNode(id2mod.get(modnum), this.normalGammaPrior);
					node.fromXML(rootnode);
					//System.out.println(node.getInternalNodes().size());
					//int numnodes = node.getInternalNodes().size();
					int numreg=0;
						
					for (TreeNode nd : node.getInternalNodes()) {
						for (Split splt : nd.testSplits) {
							numreg++;
						}
					}
					
					/* Only a couple of module were assigned regulators in this file,
					 * so select only the trees with assigned regulators, else destroy the node object.
					 */
					if (numreg > 0) {
						Module mod = id2mod.get(modnum);
						mod.hierarchicalTrees.add(node);
						mod.hierarchicalTree = node;
						//System.out.println("module: " + modnum + " tree: " + l + " numreg " + numreg);
					}
					else {
						node = null;
					}
				}
			}
			br.close();
		}
		catch (IOException ex) {
			System.out.println("An IO Exception occured");
			ex.printStackTrace();
			System.exit(1);
		}
		catch (SAXParseException ex) {
			System.out.println("A SAX exception occured");
			ex.printStackTrace();
			System.exit(1);
		}
		catch (ValidityException ex) {
			System.out.println("A validity exception occured");
			ex.printStackTrace();
			System.exit(1);
		}
		catch (ParsingException ex) {
			System.out.println("A parsing exception occured");
			ex.printStackTrace();
			System.exit(1);
		}

	}
	

	/**
	 * Reassigns genes to other modules based on the current partitioning of the 
	 * experiments in each module. All single gene moves which improve the Bayesian
	 * score and preserve acyclicity are made. We only need to check moves to and from
	 * modules whose tree has changed in the previous step (stored in
	 * {@link Module#changed} field)
	 * 
	 * @param cutoff only consider moves with score improvement higher than cutoff.
	 * @return number of moves made.
	 * @author erbon, tomic
	 */
	public int moduleReassignTopDown(double cutoff) {
		System.out.println("Reassigning genes ...");
		// count number of moves
		int numMoves = 0;
		for (Gene gene : geneSet) {
			// loop over all modules
			for (Module module : moduleSet) {
				// current module for gene
				Module current_module = moduleSet.get(inModule.get(gene.name));
				// Module current_module = gene.module;
				if (module.changed | current_module.changed) {
					// reaffect gene to new module
					if (!module.equals(current_module)) {
						// test if resulting moduleGraph will be acyclic
						if (!moduleGraph.testCycle(gene, current_module, module)) {
							// current score (relevant part)
							double current_score = current_module.moduleScore + module.moduleScore;
							// for each leaf of the trees of current_module and
							// module: make a copy of statistics
							current_module.hierarchicalTree.copyStatisticsTmp();
							module.hierarchicalTree.copyStatisticsTmp();
							// compute statistics and score for hypothetical move,
							current_module.hierarchicalTree.statisticsRemoveGeneTmp(gene);
							module.hierarchicalTree.statisticsAddGeneTmp(gene);
							double new_score_current = current_module.hierarchicalTree.bayesianScoreTmp();
							double new_score_m = module.hierarchicalTree.bayesianScoreTmp();
							// make move only if the score is improved
							if ((new_score_current + new_score_m - current_score) / numGenes > cutoff) {
								numMoves++;
								moduleGraph.removeGene(gene);
								moduleGraph.addGene(gene, module);
								inModule.put(gene.name, module.number);
								// gene.module = module;
								module.genes.add(gene);
								current_module.genes.remove(gene);
								// copy temporary statistics
								current_module.hierarchicalTree.copyStatistics();
								module.hierarchicalTree.copyStatistics();

								// update network score (before updating module scores)
								networkScore = networkScore - current_module.moduleScore - module.moduleScore + new_score_current + new_score_m;

								// set module scores
								current_module.moduleScore = new_score_current;
								module.moduleScore = new_score_m;
							}
						}
					}
				}
			}
		}

		System.out.println("... made " + numMoves + " reassignments.");
		System.out.println("new network score per gene " + networkScore / numGenes + "\n");
		return numMoves;
	}

	/**
	 * Learns new regulation trees from the top down for all modules using
	 * exact splits on the candidate regulators which preserve acyclicity.
	 * 
	 * @param maxParents maximum number of parents per module
	 * @param pw writer for printing scores (to file or stdout)
	 * 
	 * @author tomic
	 */
	public void LearnStructureNew(int maxParents, PrintWriter pw) {
		double max;
		int maxloc;
		int numModules = this.moduleSet.size();
		boolean[] foundSplit = new boolean[numModules];
		boolean canSplit;
		DoubleArrayList bestSplitScores = new DoubleArrayList(numModules);
		for (int i = 0; i < numModules; i++)
			bestSplitScores.add(0.0);

		System.out.println("Learning new structure ...");
		// set module assignment map
		this.setInModule();
		// reset module graph
		this.moduleGraph = new ModuleGraph(this);
		// reset trees for each module and find the best split
		System.out.println("Resetting trees and initializing best splits ...");
		for (Module mod : moduleSet) {
			// reset parents list
			mod.parents = new ArrayList<Gene>();
			// reset tree and recompute statistics
			mod.hierarchicalTree = new TreeNode(mod, this.normalGammaPrior);
			for (int m = 0; m < numCond; m++)
				mod.hierarchicalTree.leafDistribution.condSet.add(m);
			mod.hierarchicalTree.statistics();
			// update score value
			mod.moduleScore = mod.hierarchicalTree.bayesianScore();
			System.out.println(mod.moduleScore);
			// find best split
			foundSplit[mod.number] = mod.bestSplitTopDown(maxParents);
			bestSplitScores.set(mod.number, mod.bestSplit.scoreGain);
		}
		// update ModuleNetwork score
		bayesianScore();
		System.out.println("... done");

		do {
			// find the module with the heighest split score
			max = Descriptive.max(bestSplitScores);
			// find first occurence of max
			maxloc = bestSplitScores.indexOfFromTo(max, 0, numModules - 1);
			System.out.println("Best split is in module " + maxloc);
			// make the split if it improves the score
			if (max > 0.0) {
				// add regulator to parent set and update moduleGraph
				moduleSet.get(maxloc).parents.add(moduleSet.get(maxloc).bestSplit.regulator);
				moduleGraph.addRegulator(moduleSet.get(maxloc).bestSplit.regulator, moduleSet.get(maxloc));
				// perform split
				moduleSet.get(maxloc).bestSplit.node.split(moduleSet.get(maxloc).bestSplit.regulator, moduleSet.get(maxloc).bestSplit.splitValue);
				// update module score : subtract old leaf score and add 2 new
				// scores
				moduleSet.get(maxloc).moduleScore = moduleSet.get(maxloc).moduleScore - moduleSet.get(maxloc).bestSplit.node.leafDistribution.score + moduleSet.get(maxloc).bestSplit.node.leftChild.leafDistribution.score
						+ moduleSet.get(maxloc).bestSplit.node.rightChild.leafDistribution.score;
				// find new best split in splitted module
				foundSplit[maxloc] = moduleSet.get(maxloc).bestSplitTopDown(maxParents);
				bestSplitScores.set(maxloc, moduleSet.get(maxloc).bestSplit.scoreGain);
				// test if the other best splits are still valid, if so, they
				// are still the best for their module
				for (Module mod : moduleSet) {
					if (mod.number != maxloc) {
						if (foundSplit[mod.number] && moduleGraph.testCycle(mod.bestSplit.regulator, mod)) {
							// cyclic, so we need to find a new best split
							foundSplit[mod.number] = mod.bestSplitTopDown(maxParents);
							bestSplitScores.set(mod.number, mod.bestSplit.scoreGain);
						}
					}
				}
			} else {
				foundSplit[maxloc] = false;
			}
			canSplit = foundSplit[0];
			int i = 1;
			while (!canSplit && i < numModules) {
				canSplit = canSplit | foundSplit[i];
				i++;
			}
			// update ModuleNetwork score
			bayesianScore();
			pw.println(networkScore / numGenes + "\t" + 0);
		} while (canSplit);
		System.out.println("New network score per gene " + networkScore / numGenes + "\n");
	}

	/**
	 * Updates regulation trees from the top down for all modules using
	 * exact splits on candidate regulators which preserve acyclicity.
	 * 
	 * @param maxParents maximum number of parents per module
	 * @param pw writer for printing scores (to file or stdout)
	 * 
	 * @author tomic
	 */
	public void LearnStructureUpdate(int maxParents, PrintWriter pw) {
		System.out.println("Update structure ...");
		int numModules = this.moduleSet.size();
		// start by resetting the bestSplits
		DoubleArrayList bestSplitScores = new DoubleArrayList(numModules);
		for (int i = 0; i < numModules; i++)
			bestSplitScores.add(0.0);
		for (Module mod : moduleSet)
			mod.bestSplit = new Split();

		/* for each module we prune the tree by checking if a split is
		   still the best split of the leaves below it; if yes, we
		   keep it, if not we delete the split and all internal nodes
		   below it; we prune the modules with the lowest score (per
		   gene in the module) first (to release regulators that can
		   improve the good modules even more.) */
		DoubleArrayList moduleScores = new DoubleArrayList(numModules);
		for (Module mod : moduleSet)
			moduleScores.add(mod.moduleScore / mod.genes.size());
		double moduleMin = Descriptive.min(moduleScores);
		int minloc;
		while (moduleMin < 0.0) {
			// current module with lowest score
			minloc = moduleScores.indexOfFromTo(moduleMin, 0, numModules - 1);
			System.out.println("Trimming tree for module " + minloc + " (score = " + moduleScores.get(minloc) + ")");
			moduleSet.get(minloc).treeTrim(moduleSet.get(minloc).hierarchicalTree);
			bestSplitScores.set(minloc, moduleSet.get(minloc).bestSplit.scoreGain);
			// set moduleScores to 0 so module will not be selected again
			moduleScores.set(minloc, 0.0);
			// set new minimum
			moduleMin = Descriptive.min(moduleScores);
		}

		// all trees are trimmed and the bestSplits arrays are updated: get
		// ready for regrowing trees
		double max;
		int maxloc;
		boolean[] foundSplit = new boolean[numModules];
		boolean canSplit;
		int changedModules = 0;
		for (Module mod : moduleSet) {
			// did we find a new best split?
			if (bestSplitScores.get(mod.number) > 0.0) {
				foundSplit[mod.number] = true;
				mod.changed = true;
				changedModules++;
			} else {
				foundSplit[mod.number] = false;
				mod.changed = false;
			}
			// compute scores
			mod.moduleScore = mod.hierarchicalTree.bayesianScore();
		}
		// update ModuleNetwork score
		bayesianScore();
		System.out.println("Network score per gene after trimming " + networkScore / numGenes + "\n");

		// regrow trees like in the LearnStructureNew method
		do {
			// find the module with the heighest split score
			max = Descriptive.max(bestSplitScores);
			// find first occurence of max
			maxloc = bestSplitScores.indexOfFromTo(max, 0, numModules - 1);
			System.out.println("Best split is in module " + maxloc + " (score gain = " + max / numGenes + ")");
			// make the split if it improves the score
			if (max > 0.0) {
				// add to parent set and update moduleGraph
				moduleSet.get(maxloc).parents.add(moduleSet.get(maxloc).bestSplit.regulator);
				moduleGraph.addRegulator(moduleSet.get(maxloc).bestSplit.regulator, moduleSet.get(maxloc));
				// perform split
				moduleSet.get(maxloc).bestSplit.node.split(moduleSet.get(maxloc).bestSplit.regulator, moduleSet.get(maxloc).bestSplit.splitValue);
				// update module score : subtract old score and add 2 new scores
				moduleSet.get(maxloc).moduleScore = moduleSet.get(maxloc).moduleScore - moduleSet.get(maxloc).bestSplit.node.leafDistribution.score + moduleSet.get(maxloc).bestSplit.node.leftChild.leafDistribution.score
						+ moduleSet.get(maxloc).bestSplit.node.rightChild.leafDistribution.score;
				// find new best split in splitted module
				foundSplit[maxloc] = moduleSet.get(maxloc).bestSplitTopDown(maxParents);
				bestSplitScores.set(maxloc, moduleSet.get(maxloc).bestSplit.scoreGain);
				// test if the other best splits are still valid, if so, they
				// are still the best for their module
				for (Module mod : moduleSet) {
					if (mod.number != maxloc) {
						if (foundSplit[mod.number] && moduleGraph.testCycle(mod.bestSplit.regulator, mod)) {
							// cyclic, so we need to find a new best split
							foundSplit[mod.number] = mod.bestSplitTopDown(maxParents);
							bestSplitScores.set(mod.number, mod.bestSplit.scoreGain);
						}
					}
				}
			} else {
				foundSplit[maxloc] = false;
			}
			canSplit = foundSplit[0];
			int i = 1;
			while (!canSplit && i < numModules) {
				canSplit = canSplit | foundSplit[i];
				i++;
			}
			// update ModuleNetwork score
			bayesianScore();
			pw.println(networkScore / numGenes + "\t" + 0);
		} while (canSplit);
		// update ModuleNetwork score
		bayesianScore();
		pw.println(networkScore / numGenes + "\t" + 0);
		System.out.println(changedModules + " modules have new tree");
		System.out.println("New network score per gene " + networkScore / numGenes + "\n");
	}

	/**
	 * Heuristic search algorithm to find a (local) maximum of the Bayesian score. 
	 * This algorithm alternates between finding an optimal partitioning of the 
	 * experiments in each module by finding from the top down the best exact splits
	 * with the given list of candidate regulators, and reassigning genes to other 
	 * modules given the current partitioning of experiments in each module, until 
	 * the Bayesian score can no longer be improved.
	 *
	 * @param maxParents maximal number of parents for each module.
	 * @param epsConvergence stop searching for a better solution if the difference in
	 * scores between successive solutions becomes smaller than this value.
	 * @param scoreFiles store network scores during heuristic search to evaluate 
	 * convergence (optional argument).
	 * @author tomic
	 * 
	 */
	public void heuristicSearchMaxTopDown(int maxParents, Double epsConvergence, String... scoreFiles) {
		Integer numMoves;
		double fracMoves, currentScore;
		try {
			PrintWriter pw;
			if (scoreFiles.length == 1) { // a score file is given
				File f = new File(scoreFiles[0]);
				FileWriter fw = new FileWriter(f);
				pw = new PrintWriter(fw);
			} else { // no score file given, print scores to stdout
				pw = new PrintWriter(System.out);
			}
			pw.println(networkScore / numGenes + "\t" + 0);
			// start by learning a new structure
			LearnStructureNew(maxParents, pw);
			do {
				currentScore = networkScore;
				do { // reassign genes
					numMoves = moduleReassignTopDown(0.0);
					fracMoves = numMoves.doubleValue() / numGenes;
					pw.println(networkScore / numGenes + "\t" + fracMoves);
				} while (numMoves > 0);
				// update structure
				LearnStructureUpdate(maxParents, pw);
			} while ((networkScore - currentScore) / numGenes > epsConvergence);
			//pw.close();
		} catch (IOException e) {
			System.out.println("IOException: " + e);
		}
	}
	
	/**
	 * Set top regulators for each module.
	 * @param filename Tab delimited file <reg, module_number>
	 * 
	 * @author erbon
	 */
	public void setTopRegulators(String filename) {
		try {
			BufferedReader input = new BufferedReader(new FileReader(new File(filename)));
			String line;
			while((line = input.readLine()) != null) {
				line.trim();
				String[] tk = line.split("\\t");
				String gene_name = tk[0];
				int module_number = Integer.parseInt(tk[1]);
		
				this.moduleSet.get(module_number).topRegulators.add(this.geneMap.get(gene_name));
			}
			input.close();
		}
		catch (IOException e) {
			System.out.println("Error while loading top regulators file.");
			e.printStackTrace();
			System.exit(1);
		}
	}

	/**
	 * Set top regulators classes.
	 * 
	 * @param filename Tab delimited file <reg, class, module_numbe, scorer>
	 * @param list of regulator classes
	 * 
	 * @author eric
	 */
	public void setTopRegulatorClasses(String topRegFiles) {
		
		System.out.print("Reading top regulators classes... ");
		ArrayList<String> fileList = new ArrayList<String>();
		try {
			// load list of regulator file names
			BufferedReader input = new BufferedReader(new FileReader(new File(topRegFiles)));
			String line;
			while((line = input.readLine()) != null) {
				if (!line.startsWith("#") && line.length()>1) {
					line.trim();
					fileList.add(line);
				}
			}
			input.close();
		}
		catch (Exception e) {
			System.out.println("Error while loading top regulator classes file.");
			e.printStackTrace();
			System.exit(1);
		}
		
		// initialize top regulator classes in each module
		for (Module mod : this.moduleSet) {
			 for (int i=0;i<fileList.size();i++)
				 mod.topRegClasses.add(new ArrayList<Regulator>());
		}

		// now read regulator files
		try {
			int ct = 0;
			for (String fn : fileList) {
				
				BufferedReader input = new BufferedReader(new FileReader(new File(fn)));
				String line;
				while((line = input.readLine()) != null) {
					line.trim();
					String[] tk = line.split("\\t");
					String gene_name = tk[0];
					int module_number = Integer.parseInt(tk[1]);
					double score = Double.parseDouble(tk[2]);
					
					Regulator reg = new Regulator(this.geneMap.get(gene_name), score);
					this.moduleSet.get(module_number).topRegClasses.get(ct).add(reg);
				}
				input.close();
				ct++;
			}
			
			System.out.println(" [ok]");
		}
		catch (Exception e) {
			e.printStackTrace();
			System.exit(1);
		}
	}

	
	/**
	 * Calculate mean and standard deviation for each module.
	 * 
	 * @author erbon
	 */
	public void setModuleMeanSigma() {
		int nbval=0;
		for (Module mod : this.moduleSet) {
			for (Gene g : mod.genes) {
				for (int j=0; j < this.data[g.number].length; j++) {
					if (!Double.isNaN(this.data[g.number][j])) {
						mod.mean += this.data[g.number][j];
						nbval++;
					}
				}
			}
			mod.mean = mod.mean / nbval;
			nbval=0;
		}
		
		nbval=0;
		for (Module mod : this.moduleSet) {
			for (Gene g : mod.genes) {
				for (int j=0; j < this.data[g.number].length; j++) {
					if (!Double.isNaN(this.data[g.number][j])) {
						mod.sigma += Math.pow(mod.mean - this.data[g.number][j],2);
						nbval++;
					}
				}
			}
			mod.sigma = Math.sqrt(mod.sigma / nbval);
			nbval=0;
		}
	}
	
	/**
	 * Calculate mean and standard deviation for each regulator.
	 * 
	 * @author erbon
	 */
	public void setRegulatorMeanSigma() {
		
		for (Gene g : this.regulatorSet) {
			int ct=0;
			for (int c=0; c < this.data[g.number].length; c++) {
				if (!Double.isNaN(this.data[g.number][c])) {
					g.mean += this.data[g.number][c];
					ct++;
				}
			}
			g.mean = g.mean / ct;
		}
		
		for (Gene g : this.regulatorSet) {
			int ct=0;
			for (int c=0; c < this.data[g.number].length; c++) {
				if (!Double.isNaN(this.data[g.number][c])) {
					g.sigma += Math.pow(g.mean - this.data[g.number][c],2);
					ct++;
				}
			}
			g.sigma = Math.sqrt(g.sigma / ct);
		}
	}
	
	/**
	 * Check that experiment's names are defined. If not, just add experiments as numbers.
	 * 
	 * @author erbon
	 */
	public void checkExperiments() {
		if (this.conditionSet == null) {
			this.conditionSet = new ArrayList<Experiment>(this.numCond);
			for (int i=0; i < this.numCond; i++) {
				Experiment e = new Experiment(Integer.toString(i), i);
				this.conditionSet.add(i, e);
			}
		}
	}
	
	/**
	 * Set boolean property useGlobalMeanForFigures 
	 * 
	 * @author erbon
	 */
	public void setGlobalMeanForFigures(boolean b) {
		this.useGlobalMeanForFigures = b;
	}
	
	/**
	 * Get boolean property useGlobalMeanForFigures
	 * 
	 * @author erbon
	 */
	public boolean getGlobalMeanForFigures() {
		return this.useGlobalMeanForFigures;
	}
	
	/**
	 * Set boolean property useRegulatorMeanForFigures
	 * 
	 *  @author erbon
	 */
	public void setRegulatorlMeanForFigures(boolean b) {
		this.useRegulatorMeanForFigures = b;
	}
	
	/**
	 * Get boolean property useRegulatorMeanForFigures
	 * 
	 * @author erbon
	 */
	public boolean getRegulatorMeanForFigures() {
		return this.useRegulatorMeanForFigures;
	}
	
	/**
	 * Define top 1% regulator score
	 * 
	 * @author erbon
	 */
	public double getTopRegulatorsScoreCutoff () {
		double ret = 0;
		ArrayList<Double> scores = new ArrayList<Double>();
		// add all regulator scores to a list
		for (Module mod : this.moduleSet) {
			if (!mod.genes.isEmpty()) {
                for (Gene reg : mod.regulatorWeights.keySet()) {
                	scores.add(mod.regulatorWeights.get(reg));
                }
			}
		}
		// Sort the list in reverse order
		Collections.sort(scores,Collections.reverseOrder());
		// Define the 1% position in the list
		int index = (int) Math.round(scores.size() * 0.01);
		// get the score corresponding to the 1% cutoff
		ret = scores.get(index);
		return ret;
	}
	
	/**
	 * Print regulators to a file.
	 * 
	 * @param filename: output file name
	 * @param all_regulators: print all regulators or only top 1%.
	 * @param matlab: module numbers start at 1 or 0.
	 * 
	 * @author erbon
	 */
	public void printRegulators(String filename, boolean all_regulators, boolean matlab) {
		
		// cutoff score value
		double cutoff = 0;
		
		//matlab, strangely enough, start numbering at 1
		int inc = 0;
		if (matlab == true)
			inc = 1;
		
		if (all_regulators == false)
			cutoff = this.getTopRegulatorsScoreCutoff();
		
		try{
            File output = new File(filename);
            PrintWriter pw = new PrintWriter(output);
            System.out.print("Printing regulator file "+filename);
            for (Module mod : this.moduleSet){
                if (!mod.genes.isEmpty()) {
                	ArrayList<RegData> reg_list = new ArrayList<RegData>();
                    for (Gene reg : mod.regulatorWeights.keySet()) {
                    	RegData r = new RegData(mod.regulatorWeights.get(reg), reg.name, reg.discrete);
                    	reg_list.add(r);
                    }
                    // Sort regulators according to their decreasing score
                    Collections.sort(reg_list, Collections.reverseOrder());
                    for (RegData r : reg_list) {
                    	int n = mod.number + inc;
                    	//String line = r.name + "\t" + r.type + "\t" + n + "\t" + String.format("%10.6f", r.weight);
                    	String line = r.name + "\t" + n + "\t" + String.format("%10.6f", r.weight);
                    	if (all_regulators == true)
                    		pw.println(line);
                    	else if (r.weight > cutoff)
                    		pw.println(line);
                    }
                }
            }
            pw.close();
            System.out.println(" [ok]");
        } catch (IOException e){
        	System.out.println("Error while trying to create the output file");
            System.out.println(e);
            e.printStackTrace();
        }
	}

	/**
	 * Print random regulators to a file.
	 * 
	 * @param filename: output file name
	 * @param matlab: module numbers start at 1 or 0.
	 * 
	 * @author erbon
	 */
	public void printRandomRegulators (String filename, boolean matlab) {
				
		//matlab, strangely enough, start numbering at 1
		int inc = 0;
		if (matlab == true)
			inc = 1;
				
		try{
            File output = new File(filename);
            PrintWriter pw = new PrintWriter(output);
            System.out.print("Printing random regulator file "+filename);
            for (Module mod : this.moduleSet){
                if (!mod.genes.isEmpty()) {
                	ArrayList<RegData> reg_list = new ArrayList<RegData>();
                    for (Gene reg : mod.regulatorWeightsRandom.keySet()) {
                    	RegData r = new RegData(mod.regulatorWeightsRandom.get(reg), reg.name, reg.discrete);
                    	reg_list.add(r);
                    }
                    // Sort regulators according to their decreasing score
                    Collections.sort(reg_list, Collections.reverseOrder());
                    for (RegData r : reg_list) {
                    	int n = mod.number + inc;
                    	//String line = r.name + "\t" + r.type + "\t" + n + "\t" + String.format("%10.6f", r.weight);
                    	String line = r.name + "\t" + n + "\t" + String.format("%10.6f", r.weight);
                    	pw.println(line);
                    }
                }
            }
            pw.close();
            System.out.println(" [ok]");
        } catch (IOException e){
        	System.out.println("Error while trying to create the output file");
            System.out.println(e);
            e.printStackTrace();
        }
	}

	
	
	/**
	 * Replace current gene names with new ones in the gene set
	 * @param map HashMap current gene names -> new gene names.
	 * 
	 * @author erbon
	 */
	public void changeGeneNames (String fileName) {
		
		HashMap<String, String> map = new HashMap<String, String>();
		try {
			BufferedReader buf = new BufferedReader(new FileReader(fileName));
			String line;
			while((line = buf.readLine()) != null) {
				if (!line.startsWith("#")) {
					line.trim();
					String tk[] = line.split("\\s+|\\t");
					map.put(tk[0], tk[1]);
				}
			}
		}
		catch (FileNotFoundException e) {
			System.out.println("Error: file "+fileName+" not found");
			System.exit(1);
		}
		catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
		// replace all gene names 
		for (Gene g : this.geneSet) {
			String key = g.name;
			String new_name = map.get(key);
			if (new_name != null)
				g.name = map.get(key);
			else
				System.out.println("Warning: no gene name found for " + g.name);
		}
	}
	
	/**
	 * Set values for the normal Gamma priors.
	 * 
	 * @param lambda0
	 * @param mu0
	 * @param alpha0
	 * @param beta0
	 * 
	 * @author eric
	 */
	public void setNormalGammaPriors(double lambda0, double mu0, double alpha0, double beta0) {
		// normal Gamma priors
		this.normalGammaPrior[0] = lambda0;
		this.normalGammaPrior[1] = mu0;
		this.normalGammaPrior[2] = alpha0;
		this.normalGammaPrior[3] = beta0;
	}
	
	/**
	 * Initialize statistics and score for a module network.
	 * Data matrix, clusters and regulators should be set.
	 *  
	 *  @author eric
	 */
	public void initStatisticsAndScore() {
		
		// compute initial statistics and score
		ArrayList<Integer> allConds = new ArrayList<Integer>();
		for (int i = 0; i < this.numCond; i++)
			allConds.add(i);
		for (Module module : this.moduleSet) {
			module.hierarchicalTree = new TreeNode(module, this.normalGammaPrior);
			module.hierarchicalTree.leafDistribution.condSet = allConds;
			module.hierarchicalTree.statistics();
			module.moduleScore = module.hierarchicalTree.bayesianScore();
			module.moduleNetwork = this;
		}
		// calculate the score of the initial network
		this.bayesianScore();

	}
	
	/**
	 * Calculate mean, standard deviation, min and max values for the dataset, using module genes
	 * 
	 * @author eric
	 */
	public void setDataMeanAndSDFromModuleset() {

		double nrDataPoints = 0;
		double sumSquare = 0.0;
		this.dataMean = 0.0;
		this.dataSigma = 0.0;
		this.dataMin = Double.POSITIVE_INFINITY;
		this.dataMax = Double.NEGATIVE_INFINITY;
//		for (int i = 0; i < this.data.length; i++) {
//			for (int j = 0; j < this.data[i].length; j++) {
//				if (!Double.isNaN(data[i][j])) {
//					this.dataMean += data[i][j];
//					sumSquare += Math.pow(data[i][j], 2);
//					nrDataPoints += 1;
//					if (data[i][j] < this.dataMin)
//						this.dataMin = data[i][j];
//					else if (data[i][j] > this.dataMax)
//						this.dataMax = data[i][j];
//				}
//			}
//		}
		
		for (Module mod : this.moduleSet) {
			for (Gene g : mod.genes) {
				int i = g.number;
				for (int j = 0; j < this.data[i].length; j++) {
					if (!Double.isNaN(data[i][j])) {
						this.dataMean += data[i][j];
						sumSquare += Math.pow(data[i][j], 2);
						nrDataPoints += 1;
						if (data[i][j] < this.dataMin)
							this.dataMin = data[i][j];
						else if (data[i][j] > this.dataMax)
							this.dataMax = data[i][j];
					}
				}
			}
		}

		this.dataMean /= (double) nrDataPoints;
		this.dataSigma = Math.sqrt(sumSquare - nrDataPoints * Math.pow(this.dataMean, 2)) / Math.sqrt((double) nrDataPoints);
		System.out.println("dataMean: " + dataMean);
		System.out.println("dataSigma: " + dataSigma);
		System.out.println("dataMin: " + dataMin);
		System.out.println("dataMax: " + dataMax);
	}
	
	  /**
	   * Set a map to the conditions. conditionSet should be defined.
	   * 
	   * @author eric
	   */
	  public void setConditionMap() {

	    conditionMap = new HashMap<String, Experiment>();

	    for (int i=0;i<conditionSet.size();i++) {
	      Experiment e = conditionSet.get(i);
	      conditionMap.put(e.name, e);
	    }
	  }

}
